# --- T2-COPYRIGHT-NOTE-BEGIN --- # T2 SDE: package/*/linux/ia64.patch # Copyright (C) 2024 The T2 SDE Project # # This Copyright note is generated by scripts/Create-CopyPatch, # more information can be found in the files COPYING and README. # # This patch file is dual-licensed. It is available under the license the # patched project is licensed under, as long as it is an OpenSource license # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms # of the GNU General Public License version 2 as used by the T2 SDE. # --- T2-COPYRIGHT-NOTE-END --- Our IA-64 revert tree at: https://github.com/lenticularis39/linux-ia64/ diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu index a1db6db47505..7ecd5c8161a6 100644 --- a/Documentation/ABI/testing/sysfs-devices-system-cpu +++ b/Documentation/ABI/testing/sysfs-devices-system-cpu @@ -354,6 +354,9 @@ Description: Parameters for the CPU cache attributes - ReadWriteAllocate: both writeallocate and readallocate + attributes: + LEGACY used only on IA64 and is same as write_policy + coherency_line_size: the minimum amount of data in bytes that gets transferred from memory to cache diff --git a/Documentation/ABI/testing/sysfs-firmware-dmi-entries b/Documentation/ABI/testing/sysfs-firmware-dmi-entries index b6c23807b804..fe0289c87768 100644 --- a/Documentation/ABI/testing/sysfs-firmware-dmi-entries +++ b/Documentation/ABI/testing/sysfs-firmware-dmi-entries @@ -2,7 +2,7 @@ What: /sys/firmware/dmi/entries/ Date: February 2011 Contact: Mike Waychison Description: - Many machines' firmware (x86 and arm64) export DMI / + Many machines' firmware (x86 and ia64) export DMI / SMBIOS tables to the operating system. Getting at this information is often valuable to userland, especially in cases where there are OEM extensions used. diff --git a/Documentation/admin-guide/kdump/kdump.rst b/Documentation/admin-guide/kdump/kdump.rst index 5762e7477a0c..a748e7eb4429 100644 --- a/Documentation/admin-guide/kdump/kdump.rst +++ b/Documentation/admin-guide/kdump/kdump.rst @@ -17,7 +17,7 @@ You can use common commands, such as cp, scp or makedumpfile to copy the memory image to a dump file on the local disk, or across the network to a remote system. -Kdump and kexec are currently supported on the x86, x86_64, ppc64, +Kdump and kexec are currently supported on the x86, x86_64, ppc64, ia64, s390x, arm and arm64 architectures. When the system kernel boots, it reserves a small section of memory for @@ -113,7 +113,7 @@ There are two possible methods of using Kdump. 2) Or use the system kernel binary itself as dump-capture kernel and there is no need to build a separate dump-capture kernel. This is possible only with the architectures which support a relocatable kernel. As - of today, i386, x86_64, ppc64, arm and arm64 architectures support + of today, i386, x86_64, ppc64, ia64, arm and arm64 architectures support relocatable kernel. Building a relocatable kernel is advantageous from the point of view that @@ -236,6 +236,24 @@ Dump-capture kernel config options (Arch Dependent, ppc64) Make and install the kernel and its modules. +Dump-capture kernel config options (Arch Dependent, ia64) +---------------------------------------------------------- + +- No specific options are required to create a dump-capture kernel + for ia64, other than those specified in the arch independent section + above. This means that it is possible to use the system kernel + as a dump-capture kernel if desired. + + The crashkernel region can be automatically placed by the system + kernel at runtime. This is done by specifying the base address as 0, + or omitting it all together:: + + crashkernel=256M@0 + + or:: + + crashkernel=256M + Dump-capture kernel config options (Arch Dependent, arm) ---------------------------------------------------------- @@ -330,6 +348,11 @@ Boot into System Kernel On ppc64, use "crashkernel=128M@32M". + On ia64, 256M@256M is a generous value that typically works. + The region may be automatically placed on ia64, see the + dump-capture kernel config option notes above. + If use sparse memory, the size should be rounded to GRANULE boundaries. + On s390x, typically use "crashkernel=xxM". The value of xx is dependent on the memory consumption of the kdump system. In general this is not dependent on the memory size of the production system. @@ -360,6 +383,10 @@ For ppc64: - Use vmlinux +For ia64: + + - Use vmlinux or vmlinuz.gz + For s390x: - Use image or bzImage @@ -401,10 +428,14 @@ to load dump-capture kernel:: --initrd= \ --append="root= " +Please note, that --args-linux does not need to be specified for ia64. +It is planned to make this a no-op on that architecture, but for now +it should be omitted + Following are the arch specific command line options to be used while loading dump-capture kernel. -For i386 and x86_64: +For i386, x86_64 and ia64: "1 irqpoll nr_cpus=1 reset_devices" diff --git a/Documentation/admin-guide/kdump/vmcoreinfo.rst b/Documentation/admin-guide/kdump/vmcoreinfo.rst index bced9e4b6e08..64ab49ec8fe7 100644 --- a/Documentation/admin-guide/kdump/vmcoreinfo.rst +++ b/Documentation/admin-guide/kdump/vmcoreinfo.rst @@ -413,6 +413,36 @@ of a higher page table lookup overhead, and also consumes more page table space per process. Used to check whether PAE was enabled in the crash kernel when converting virtual addresses to physical addresses. +ia64 +==== + +pgdat_list|(pgdat_list, MAX_NUMNODES) +------------------------------------- + +pg_data_t array storing all NUMA nodes information. MAX_NUMNODES +indicates the number of the nodes. + +node_memblk|(node_memblk, NR_NODE_MEMBLKS) +------------------------------------------ + +List of node memory chunks. Filled when parsing the SRAT table to obtain +information about memory nodes. NR_NODE_MEMBLKS indicates the number of +node memory chunks. + +These values are used to compute the number of nodes the crashed kernel used. + +node_memblk_s|(node_memblk_s, start_paddr)|(node_memblk_s, size) +---------------------------------------------------------------- + +The size of a struct node_memblk_s and the offsets of the +node_memblk_s's members. Used to compute the number of nodes. + +PGTABLE_3|PGTABLE_4 +------------------- + +User-space tools need to know whether the crash kernel was in 3-level or +4-level paging mode. Used to distinguish the page table. + ARM64 ===== diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 31b3a25680d0..844e52ad250b 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1469,7 +1469,7 @@ See comment before function elanfreq_setup() in arch/x86/kernel/cpu/cpufreq/elanfreq.c. - elfcorehdr=[size[KMG]@]offset[KMG] [PPC,SH,X86,S390] + elfcorehdr=[size[KMG]@]offset[KMG] [IA64,PPC,SH,X86,S390] Specifies physical address of start of kernel core image elf header and optionally the size. Generally kexec loader will pass this option to capture kernel. @@ -1532,6 +1532,12 @@ floppy= [HW] See Documentation/admin-guide/blockdev/floppy.rst. + force_pal_cache_flush + [IA-64] Avoid check_sal_cache_flush which may hang on + buggy SAL_CACHE_FLUSH implementations. Using this + parameter will force ia64_sal_cache_flush to call + ia64_pal_cache_flush instead of SAL_CACHE_FLUSH. + forcepae [X86-32] Forcefully enable Physical Address Extension (PAE). Many Pentium M systems disable PAE but may have a diff --git a/Documentation/admin-guide/mm/memory-hotplug.rst b/Documentation/admin-guide/mm/memory-hotplug.rst index 098f14d83e99..aa50b8501c2e 100644 --- a/Documentation/admin-guide/mm/memory-hotplug.rst +++ b/Documentation/admin-guide/mm/memory-hotplug.rst @@ -33,7 +33,7 @@ used to expose persistent memory, other performance-differentiated memory and reserved memory regions as ordinary system RAM to Linux. Linux only supports memory hot(un)plug on selected 64 bit architectures, such as -x86_64, arm64, ppc64 and s390x. +x86_64, arm64, ppc64, s390x and ia64. Memory Hot(Un)Plug Granularity ------------------------------ diff --git a/Documentation/admin-guide/sysctl/kernel.rst b/Documentation/admin-guide/sysctl/kernel.rst index 6584a1f9bfe3..d89ac2bd8dc4 100644 --- a/Documentation/admin-guide/sysctl/kernel.rst +++ b/Documentation/admin-guide/sysctl/kernel.rst @@ -436,7 +436,7 @@ ignore-unaligned-usertrap On architectures where unaligned accesses cause traps, and where this feature is supported (``CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN``; -currently, ``arc`` and ``loongarch``), controls whether all +currently, ``arc``, ``ia64`` and ``loongarch``), controls whether all unaligned traps are logged. = ============================================================= @@ -445,7 +445,10 @@ unaligned traps are logged. setting. = ============================================================= -See also `unaligned-trap`_. +See also `unaligned-trap`_ and `unaligned-dump-stack`_. On ``ia64``, +this allows system administrators to override the +``IA64_THREAD_UAC_NOPRINT`` ``prctl`` and avoid logs being flooded. + io_uring_disabled ================= @@ -1536,6 +1539,22 @@ See Documentation/admin-guide/kernel-parameters.rst and Documentation/trace/boottime-trace.rst. +.. _unaligned-dump-stack: + +unaligned-dump-stack (ia64) +=========================== + +When logging unaligned accesses, controls whether the stack is +dumped. + += =================================================== +0 Do not dump the stack. This is the default setting. +1 Dump the stack. += =================================================== + +See also `ignore-unaligned-usertrap`_. + + unaligned-trap ============== diff --git a/Documentation/arch/ia64/aliasing.rst b/Documentation/arch/ia64/aliasing.rst new file mode 100644 index 000000000000..36a1e1d4842b --- /dev/null +++ b/Documentation/arch/ia64/aliasing.rst @@ -0,0 +1,246 @@ +================================== +Memory Attribute Aliasing on IA-64 +================================== + +Bjorn Helgaas + +May 4, 2006 + + +Memory Attributes +================= + + Itanium supports several attributes for virtual memory references. + The attribute is part of the virtual translation, i.e., it is + contained in the TLB entry. The ones of most interest to the Linux + kernel are: + + == ====================== + WB Write-back (cacheable) + UC Uncacheable + WC Write-coalescing + == ====================== + + System memory typically uses the WB attribute. The UC attribute is + used for memory-mapped I/O devices. The WC attribute is uncacheable + like UC is, but writes may be delayed and combined to increase + performance for things like frame buffers. + + The Itanium architecture requires that we avoid accessing the same + page with both a cacheable mapping and an uncacheable mapping[1]. + + The design of the chipset determines which attributes are supported + on which regions of the address space. For example, some chipsets + support either WB or UC access to main memory, while others support + only WB access. + +Memory Map +========== + + Platform firmware describes the physical memory map and the + supported attributes for each region. At boot-time, the kernel uses + the EFI GetMemoryMap() interface. ACPI can also describe memory + devices and the attributes they support, but Linux/ia64 currently + doesn't use this information. + + The kernel uses the efi_memmap table returned from GetMemoryMap() to + learn the attributes supported by each region of physical address + space. Unfortunately, this table does not completely describe the + address space because some machines omit some or all of the MMIO + regions from the map. + + The kernel maintains another table, kern_memmap, which describes the + memory Linux is actually using and the attribute for each region. + This contains only system memory; it does not contain MMIO space. + + The kern_memmap table typically contains only a subset of the system + memory described by the efi_memmap. Linux/ia64 can't use all memory + in the system because of constraints imposed by the identity mapping + scheme. + + The efi_memmap table is preserved unmodified because the original + boot-time information is required for kexec. + +Kernel Identity Mappings +======================== + + Linux/ia64 identity mappings are done with large pages, currently + either 16MB or 64MB, referred to as "granules." Cacheable mappings + are speculative[2], so the processor can read any location in the + page at any time, independent of the programmer's intentions. This + means that to avoid attribute aliasing, Linux can create a cacheable + identity mapping only when the entire granule supports cacheable + access. + + Therefore, kern_memmap contains only full granule-sized regions that + can referenced safely by an identity mapping. + + Uncacheable mappings are not speculative, so the processor will + generate UC accesses only to locations explicitly referenced by + software. This allows UC identity mappings to cover granules that + are only partially populated, or populated with a combination of UC + and WB regions. + +User Mappings +============= + + User mappings are typically done with 16K or 64K pages. The smaller + page size allows more flexibility because only 16K or 64K has to be + homogeneous with respect to memory attributes. + +Potential Attribute Aliasing Cases +================================== + + There are several ways the kernel creates new mappings: + +mmap of /dev/mem +---------------- + + This uses remap_pfn_range(), which creates user mappings. These + mappings may be either WB or UC. If the region being mapped + happens to be in kern_memmap, meaning that it may also be mapped + by a kernel identity mapping, the user mapping must use the same + attribute as the kernel mapping. + + If the region is not in kern_memmap, the user mapping should use + an attribute reported as being supported in the EFI memory map. + + Since the EFI memory map does not describe MMIO on some + machines, this should use an uncacheable mapping as a fallback. + +mmap of /sys/class/pci_bus/.../legacy_mem +----------------------------------------- + + This is very similar to mmap of /dev/mem, except that legacy_mem + only allows mmap of the one megabyte "legacy MMIO" area for a + specific PCI bus. Typically this is the first megabyte of + physical address space, but it may be different on machines with + several VGA devices. + + "X" uses this to access VGA frame buffers. Using legacy_mem + rather than /dev/mem allows multiple instances of X to talk to + different VGA cards. + + The /dev/mem mmap constraints apply. + +mmap of /proc/bus/pci/.../??.? +------------------------------ + + This is an MMIO mmap of PCI functions, which additionally may or + may not be requested as using the WC attribute. + + If WC is requested, and the region in kern_memmap is either WC + or UC, and the EFI memory map designates the region as WC, then + the WC mapping is allowed. + + Otherwise, the user mapping must use the same attribute as the + kernel mapping. + +read/write of /dev/mem +---------------------- + + This uses copy_from_user(), which implicitly uses a kernel + identity mapping. This is obviously safe for things in + kern_memmap. + + There may be corner cases of things that are not in kern_memmap, + but could be accessed this way. For example, registers in MMIO + space are not in kern_memmap, but could be accessed with a UC + mapping. This would not cause attribute aliasing. But + registers typically can be accessed only with four-byte or + eight-byte accesses, and the copy_from_user() path doesn't allow + any control over the access size, so this would be dangerous. + +ioremap() +--------- + + This returns a mapping for use inside the kernel. + + If the region is in kern_memmap, we should use the attribute + specified there. + + If the EFI memory map reports that the entire granule supports + WB, we should use that (granules that are partially reserved + or occupied by firmware do not appear in kern_memmap). + + If the granule contains non-WB memory, but we can cover the + region safely with kernel page table mappings, we can use + ioremap_page_range() as most other architectures do. + + Failing all of the above, we have to fall back to a UC mapping. + +Past Problem Cases +================== + +mmap of various MMIO regions from /dev/mem by "X" on Intel platforms +-------------------------------------------------------------------- + + The EFI memory map may not report these MMIO regions. + + These must be allowed so that X will work. This means that + when the EFI memory map is incomplete, every /dev/mem mmap must + succeed. It may create either WB or UC user mappings, depending + on whether the region is in kern_memmap or the EFI memory map. + +mmap of 0x0-0x9FFFF /dev/mem by "hwinfo" on HP sx1000 with VGA enabled +---------------------------------------------------------------------- + + The EFI memory map reports the following attributes: + + =============== ======= ================== + 0x00000-0x9FFFF WB only + 0xA0000-0xBFFFF UC only (VGA frame buffer) + 0xC0000-0xFFFFF WB only + =============== ======= ================== + + This mmap is done with user pages, not kernel identity mappings, + so it is safe to use WB mappings. + + The kernel VGA driver may ioremap the VGA frame buffer at 0xA0000, + which uses a granule-sized UC mapping. This granule will cover some + WB-only memory, but since UC is non-speculative, the processor will + never generate an uncacheable reference to the WB-only areas unless + the driver explicitly touches them. + +mmap of 0x0-0xFFFFF legacy_mem by "X" +------------------------------------- + + If the EFI memory map reports that the entire range supports the + same attributes, we can allow the mmap (and we will prefer WB if + supported, as is the case with HP sx[12]000 machines with VGA + disabled). + + If EFI reports the range as partly WB and partly UC (as on sx[12]000 + machines with VGA enabled), we must fail the mmap because there's no + safe attribute to use. + + If EFI reports some of the range but not all (as on Intel firmware + that doesn't report the VGA frame buffer at all), we should fail the + mmap and force the user to map just the specific region of interest. + +mmap of 0xA0000-0xBFFFF legacy_mem by "X" on HP sx1000 with VGA disabled +------------------------------------------------------------------------ + + The EFI memory map reports the following attributes:: + + 0x00000-0xFFFFF WB only (no VGA MMIO hole) + + This is a special case of the previous case, and the mmap should + fail for the same reason as above. + +read of /sys/devices/.../rom +---------------------------- + + For VGA devices, this may cause an ioremap() of 0xC0000. This + used to be done with a UC mapping, because the VGA frame buffer + at 0xA0000 prevents use of a WB granule. The UC mapping causes + an MCA on HP sx[12]000 chipsets. + + We should use WB page table mappings to avoid covering the VGA + frame buffer. + +Notes +===== + + [1] SDM rev 2.2, vol 2, sec 4.4.1. + [2] SDM rev 2.2, vol 2, sec 4.4.6. diff --git a/Documentation/arch/ia64/efirtc.rst b/Documentation/arch/ia64/efirtc.rst new file mode 100644 index 000000000000..fd8328408301 --- /dev/null +++ b/Documentation/arch/ia64/efirtc.rst @@ -0,0 +1,144 @@ +========================== +EFI Real Time Clock driver +========================== + +S. Eranian + +March 2000 + +1. Introduction +=============== + +This document describes the efirtc.c driver has provided for +the IA-64 platform. + +The purpose of this driver is to supply an API for kernel and user applications +to get access to the Time Service offered by EFI version 0.92. + +EFI provides 4 calls one can make once the OS is booted: GetTime(), +SetTime(), GetWakeupTime(), SetWakeupTime() which are all supported by this +driver. We describe those calls as well the design of the driver in the +following sections. + +2. Design Decisions +=================== + +The original ideas was to provide a very simple driver to get access to, +at first, the time of day service. This is required in order to access, in a +portable way, the CMOS clock. A program like /sbin/hwclock uses such a clock +to initialize the system view of the time during boot. + +Because we wanted to minimize the impact on existing user-level apps using +the CMOS clock, we decided to expose an API that was very similar to the one +used today with the legacy RTC driver (driver/char/rtc.c). However, because +EFI provides a simpler services, not all ioctl() are available. Also +new ioctl()s have been introduced for things that EFI provides but not the +legacy. + +EFI uses a slightly different way of representing the time, noticeably +the reference date is different. Year is the using the full 4-digit format. +The Epoch is January 1st 1998. For backward compatibility reasons we don't +expose this new way of representing time. Instead we use something very +similar to the struct tm, i.e. struct rtc_time, as used by hwclock. +One of the reasons for doing it this way is to allow for EFI to still evolve +without necessarily impacting any of the user applications. The decoupling +enables flexibility and permits writing wrapper code is ncase things change. + +The driver exposes two interfaces, one via the device file and a set of +ioctl()s. The other is read-only via the /proc filesystem. + +As of today we don't offer a /proc/sys interface. + +To allow for a uniform interface between the legacy RTC and EFI time service, +we have created the include/linux/rtc.h header file to contain only the +"public" API of the two drivers. The specifics of the legacy RTC are still +in include/linux/mc146818rtc.h. + + +3. Time of day service +====================== + +The part of the driver gives access to the time of day service of EFI. +Two ioctl()s, compatible with the legacy RTC calls: + + Read the CMOS clock:: + + ioctl(d, RTC_RD_TIME, &rtc); + + Write the CMOS clock:: + + ioctl(d, RTC_SET_TIME, &rtc); + +The rtc is a pointer to a data structure defined in rtc.h which is close +to a struct tm:: + + struct rtc_time { + int tm_sec; + int tm_min; + int tm_hour; + int tm_mday; + int tm_mon; + int tm_year; + int tm_wday; + int tm_yday; + int tm_isdst; + }; + +The driver takes care of converting back an forth between the EFI time and +this format. + +Those two ioctl()s can be exercised with the hwclock command: + +For reading:: + + # /sbin/hwclock --show + Mon Mar 6 15:32:32 2000 -0.910248 seconds + +For setting:: + + # /sbin/hwclock --systohc + +Root privileges are required to be able to set the time of day. + +4. Wakeup Alarm service +======================= + +EFI provides an API by which one can program when a machine should wakeup, +i.e. reboot. This is very different from the alarm provided by the legacy +RTC which is some kind of interval timer alarm. For this reason we don't use +the same ioctl()s to get access to the service. Instead we have +introduced 2 news ioctl()s to the interface of an RTC. + +We have added 2 new ioctl()s that are specific to the EFI driver: + + Read the current state of the alarm:: + + ioctl(d, RTC_WKALM_RD, &wkt) + + Set the alarm or change its status:: + + ioctl(d, RTC_WKALM_SET, &wkt) + +The wkt structure encapsulates a struct rtc_time + 2 extra fields to get +status information:: + + struct rtc_wkalrm { + + unsigned char enabled; /* =1 if alarm is enabled */ + unsigned char pending; /* =1 if alarm is pending */ + + struct rtc_time time; + } + +As of today, none of the existing user-level apps supports this feature. +However writing such a program should be hard by simply using those two +ioctl(). + +Root privileges are required to be able to set the alarm. + +5. References +============= + +Checkout the following Web site for more information on EFI: + +http://developer.intel.com/technology/efi/ diff --git a/Documentation/arch/ia64/err_inject.rst b/Documentation/arch/ia64/err_inject.rst new file mode 100644 index 000000000000..900f71e93a29 --- /dev/null +++ b/Documentation/arch/ia64/err_inject.rst @@ -0,0 +1,1067 @@ +======================================== +IPF Machine Check (MC) error inject tool +======================================== + +IPF Machine Check (MC) error inject tool is used to inject MC +errors from Linux. The tool is a test bed for IPF MC work flow including +hardware correctable error handling, OS recoverable error handling, MC +event logging, etc. + +The tool includes two parts: a kernel driver and a user application +sample. The driver provides interface to PAL to inject error +and query error injection capabilities. The driver code is in +arch/ia64/kernel/err_inject.c. The application sample (shown below) +provides a combination of various errors and calls the driver's interface +(sysfs interface) to inject errors or query error injection capabilities. + +The tool can be used to test Intel IPF machine MC handling capabilities. +It's especially useful for people who can not access hardware MC injection +tool to inject error. It's also very useful to integrate with other +software test suits to do stressful testing on IPF. + +Below is a sample application as part of the whole tool. The sample +can be used as a working test tool. Or it can be expanded to include +more features. It also can be a integrated into a library or other user +application to have more thorough test. + +The sample application takes err.conf as error configuration input. GCC +compiles the code. After you install err_inject driver, you can run +this sample application to inject errors. + +Errata: Itanium 2 Processors Specification Update lists some errata against +the pal_mc_error_inject PAL procedure. The following err.conf has been tested +on latest Montecito PAL. + +err.conf:: + + #This is configuration file for err_inject_tool. + #The format of the each line is: + #cpu, loop, interval, err_type_info, err_struct_info, err_data_buffer + #where + # cpu: logical cpu number the error will be inject in. + # loop: times the error will be injected. + # interval: In second. every so often one error is injected. + # err_type_info, err_struct_info: PAL parameters. + # + #Note: All values are hex w/o or w/ 0x prefix. + + + #On cpu2, inject only total 0x10 errors, interval 5 seconds + #corrected, data cache, hier-2, physical addr(assigned by tool code). + #working on Montecito latest PAL. + 2, 10, 5, 4101, 95 + + #On cpu4, inject and consume total 0x10 errors, interval 5 seconds + #corrected, data cache, hier-2, physical addr(assigned by tool code). + #working on Montecito latest PAL. + 4, 10, 5, 4109, 95 + + #On cpu15, inject and consume total 0x10 errors, interval 5 seconds + #recoverable, DTR0, hier-2. + #working on Montecito latest PAL. + 0xf, 0x10, 5, 4249, 15 + +The sample application source code: + +err_injection_tool.c:: + + /* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Copyright (C) 2006 Intel Co + * Fenghua Yu + * + */ + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + #include + + #define MAX_FN_SIZE 256 + #define MAX_BUF_SIZE 256 + #define DATA_BUF_SIZE 256 + #define NR_CPUS 512 + #define MAX_TASK_NUM 2048 + #define MIN_INTERVAL 5 // seconds + #define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte. + #define PARA_FIELD_NUM 5 + #define MASK_SIZE (NR_CPUS/64) + #define PATH_FORMAT "/sys/devices/system/cpu/cpu%d/err_inject/" + + int sched_setaffinity(pid_t pid, unsigned int len, unsigned long *mask); + + int verbose; + #define vbprintf if (verbose) printf + + int log_info(int cpu, const char *fmt, ...) + { + FILE *log; + char fn[MAX_FN_SIZE]; + char buf[MAX_BUF_SIZE]; + va_list args; + + sprintf(fn, "%d.log", cpu); + log=fopen(fn, "a+"); + if (log==NULL) { + perror("Error open:"); + return -1; + } + + va_start(args, fmt); + vprintf(fmt, args); + memset(buf, 0, MAX_BUF_SIZE); + vsprintf(buf, fmt, args); + va_end(args); + + fwrite(buf, sizeof(buf), 1, log); + fclose(log); + + return 0; + } + + typedef unsigned long u64; + typedef unsigned int u32; + + typedef union err_type_info_u { + struct { + u64 mode : 3, /* 0-2 */ + err_inj : 3, /* 3-5 */ + err_sev : 2, /* 6-7 */ + err_struct : 5, /* 8-12 */ + struct_hier : 3, /* 13-15 */ + reserved : 48; /* 16-63 */ + } err_type_info_u; + u64 err_type_info; + } err_type_info_t; + + typedef union err_struct_info_u { + struct { + u64 siv : 1, /* 0 */ + c_t : 2, /* 1-2 */ + cl_p : 3, /* 3-5 */ + cl_id : 3, /* 6-8 */ + cl_dp : 1, /* 9 */ + reserved1 : 22, /* 10-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_cache; + struct { + u64 siv : 1, /* 0 */ + tt : 2, /* 1-2 */ + tc_tr : 2, /* 3-4 */ + tr_slot : 8, /* 5-12 */ + reserved1 : 19, /* 13-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_tlb; + struct { + u64 siv : 1, /* 0 */ + regfile_id : 4, /* 1-4 */ + reg_num : 7, /* 5-11 */ + reserved1 : 20, /* 12-31 */ + tiv : 1, /* 32 */ + trigger : 4, /* 33-36 */ + trigger_pl : 3, /* 37-39 */ + reserved2 : 24; /* 40-63 */ + } err_struct_info_register; + struct { + u64 reserved; + } err_struct_info_bus_processor_interconnect; + u64 err_struct_info; + } err_struct_info_t; + + typedef union err_data_buffer_u { + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + : 39; /* 153-191 */ + } err_data_buffer_cache; + struct { + u64 trigger_addr; /* 0-63 */ + u64 inj_addr; /* 64-127 */ + u64 way : 5, /* 128-132 */ + index : 20, /* 133-152 */ + reserved : 39; /* 153-191 */ + } err_data_buffer_tlb; + struct { + u64 trigger_addr; /* 0-63 */ + } err_data_buffer_register; + struct { + u64 reserved; /* 0-63 */ + } err_data_buffer_bus_processor_interconnect; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; + } err_data_buffer_t; + + typedef union capabilities_u { + struct { + u64 i : 1, + d : 1, + rv : 1, + tag : 1, + data : 1, + mesi : 1, + dp : 1, + reserved1 : 3, + pa : 1, + va : 1, + wi : 1, + reserved2 : 20, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_cache; + struct { + u64 d : 1, + i : 1, + rv : 1, + tc : 1, + tr : 1, + reserved1 : 27, + trigger : 1, + trigger_pl : 1, + reserved2 : 30; + } capabilities_tlb; + struct { + u64 gr_b0 : 1, + gr_b1 : 1, + fr : 1, + br : 1, + pr : 1, + ar : 1, + cr : 1, + rr : 1, + pkr : 1, + dbr : 1, + ibr : 1, + pmc : 1, + pmd : 1, + reserved1 : 3, + regnum : 1, + reserved2 : 15, + trigger : 1, + trigger_pl : 1, + reserved3 : 30; + } capabilities_register; + struct { + u64 reserved; + } capabilities_bus_processor_interconnect; + } capabilities_t; + + typedef struct resources_s { + u64 ibr0 : 1, + ibr2 : 1, + ibr4 : 1, + ibr6 : 1, + dbr0 : 1, + dbr2 : 1, + dbr4 : 1, + dbr6 : 1, + reserved : 48; + } resources_t; + + + long get_page_size(void) + { + long page_size=sysconf(_SC_PAGESIZE); + return page_size; + } + + #define PAGE_SIZE (get_page_size()==-1?0x4000:get_page_size()) + #define SHM_SIZE (2*PAGE_SIZE*NR_CPUS) + #define SHM_VA 0x2000000100000000 + + int shmid; + void *shmaddr; + + int create_shm(void) + { + key_t key; + char fn[MAX_FN_SIZE]; + + /* cpu0 is always existing */ + sprintf(fn, PATH_FORMAT, 0); + if ((key = ftok(fn, 's')) == -1) { + perror("ftok"); + return -1; + } + + shmid = shmget(key, SHM_SIZE, 0644 | IPC_CREAT); + if (shmid == -1) { + if (errno==EEXIST) { + shmid = shmget(key, SHM_SIZE, 0); + if (shmid == -1) { + perror("shmget"); + return -1; + } + } + else { + perror("shmget"); + return -1; + } + } + vbprintf("shmid=%d", shmid); + + /* connect to the segment: */ + shmaddr = shmat(shmid, (void *)SHM_VA, 0); + if (shmaddr == (void*)-1) { + perror("shmat"); + return -1; + } + + memset(shmaddr, 0, SHM_SIZE); + mlock(shmaddr, SHM_SIZE); + + return 0; + } + + int free_shm() + { + munlock(shmaddr, SHM_SIZE); + shmdt(shmaddr); + semctl(shmid, 0, IPC_RMID); + + return 0; + } + + #ifdef _SEM_SEMUN_UNDEFINED + union semun + { + int val; + struct semid_ds *buf; + unsigned short int *array; + struct seminfo *__buf; + }; + #endif + + u32 mode=1; /* 1: physical mode; 2: virtual mode. */ + int one_lock=1; + key_t key[NR_CPUS]; + int semid[NR_CPUS]; + + int create_sem(int cpu) + { + union semun arg; + char fn[MAX_FN_SIZE]; + int sid; + + sprintf(fn, PATH_FORMAT, cpu); + sprintf(fn, "%s/%s", fn, "err_type_info"); + if ((key[cpu] = ftok(fn, 'e')) == -1) { + perror("ftok"); + return -1; + } + + if (semid[cpu]!=0) + return 0; + + /* clear old semaphore */ + if ((sid = semget(key[cpu], 1, 0)) != -1) + semctl(sid, 0, IPC_RMID); + + /* get one semaphore */ + if ((semid[cpu] = semget(key[cpu], 1, IPC_CREAT | IPC_EXCL)) == -1) { + perror("semget"); + printf("Please remove semaphore with key=0x%lx, then run the tool.\n", + (u64)key[cpu]); + return -1; + } + + vbprintf("semid[%d]=0x%lx, key[%d]=%lx\n",cpu,(u64)semid[cpu],cpu, + (u64)key[cpu]); + /* initialize the semaphore to 1: */ + arg.val = 1; + if (semctl(semid[cpu], 0, SETVAL, arg) == -1) { + perror("semctl"); + return -1; + } + + return 0; + } + + static int lock(int cpu) + { + struct sembuf lock; + + lock.sem_num = cpu; + lock.sem_op = 1; + semop(semid[cpu], &lock, 1); + + return 0; + } + + static int unlock(int cpu) + { + struct sembuf unlock; + + unlock.sem_num = cpu; + unlock.sem_op = -1; + semop(semid[cpu], &unlock, 1); + + return 0; + } + + void free_sem(int cpu) + { + semctl(semid[cpu], 0, IPC_RMID); + } + + int wr_multi(char *fn, unsigned long *data, int size) + { + int fd; + char buf[MAX_BUF_SIZE]; + int ret; + + if (size==1) + sprintf(buf, "%lx", *data); + else if (size==3) + sprintf(buf, "%lx,%lx,%lx", data[0], data[1], data[2]); + else { + fprintf(stderr,"write to file with wrong size!\n"); + return -1; + } + + fd=open(fn, O_RDWR); + if (!fd) { + perror("Error:"); + return -1; + } + ret=write(fd, buf, sizeof(buf)); + close(fd); + return ret; + } + + int wr(char *fn, unsigned long data) + { + return wr_multi(fn, &data, 1); + } + + int rd(char *fn, unsigned long *data) + { + int fd; + char buf[MAX_BUF_SIZE]; + + fd=open(fn, O_RDONLY); + if (fd<0) { + perror("Error:"); + return -1; + } + read(fd, buf, MAX_BUF_SIZE); + *data=strtoul(buf, NULL, 16); + close(fd); + return 0; + } + + int rd_status(char *path, int *status) + { + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/status", path); + if (rd(fn, (u64*)status)<0) { + perror("status reading error.\n"); + return -1; + } + + return 0; + } + + int rd_capabilities(char *path, u64 *capabilities) + { + char fn[MAX_FN_SIZE]; + sprintf(fn, "%s/capabilities", path); + if (rd(fn, capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + + return 0; + } + + int rd_all(char *path) + { + unsigned long err_type_info, err_struct_info, err_data_buffer; + int status; + unsigned long capabilities, resources; + char fn[MAX_FN_SIZE]; + + sprintf(fn, "%s/err_type_info", path); + if (rd(fn, &err_type_info)<0) { + perror("err_type_info reading error.\n"); + return -1; + } + printf("err_type_info=%lx\n", err_type_info); + + sprintf(fn, "%s/err_struct_info", path); + if (rd(fn, &err_struct_info)<0) { + perror("err_struct_info reading error.\n"); + return -1; + } + printf("err_struct_info=%lx\n", err_struct_info); + + sprintf(fn, "%s/err_data_buffer", path); + if (rd(fn, &err_data_buffer)<0) { + perror("err_data_buffer reading error.\n"); + return -1; + } + printf("err_data_buffer=%lx\n", err_data_buffer); + + sprintf(fn, "%s/status", path); + if (rd("status", (u64*)&status)<0) { + perror("status reading error.\n"); + return -1; + } + printf("status=%d\n", status); + + sprintf(fn, "%s/capabilities", path); + if (rd(fn,&capabilities)<0) { + perror("capabilities reading error.\n"); + return -1; + } + printf("capabilities=%lx\n", capabilities); + + sprintf(fn, "%s/resources", path); + if (rd(fn, &resources)<0) { + perror("resources reading error.\n"); + return -1; + } + printf("resources=%lx\n", resources); + + return 0; + } + + int query_capabilities(char *path, err_type_info_t err_type_info, + u64 *capabilities) + { + char fn[MAX_FN_SIZE]; + err_struct_info_t err_struct_info; + err_data_buffer_t err_data_buffer; + + err_struct_info.err_struct_info=0; + memset(err_data_buffer.err_data_buffer, -1, ERR_DATA_BUFFER_SIZE*8); + + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, 0x0); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn, mode); + + if (rd_capabilities(path, capabilities)<0) + return -1; + + return 0; + } + + int query_all_capabilities() + { + int status; + err_type_info_t err_type_info; + int err_sev, err_struct, struct_hier; + int cap=0; + u64 capabilities; + char path[MAX_FN_SIZE]; + + err_type_info.err_type_info=0; // Initial + err_type_info.err_type_info_u.mode=0; // Query mode; + err_type_info.err_type_info_u.err_inj=0; + + printf("All capabilities implemented in pal_mc_error_inject:\n"); + sprintf(path, PATH_FORMAT ,0); + for (err_sev=0;err_sev<3;err_sev++) + for (err_struct=0;err_struct<5;err_struct++) + for (struct_hier=0;struct_hier<5;struct_hier++) + { + status=-1; + capabilities=0; + err_type_info.err_type_info_u.err_sev=err_sev; + err_type_info.err_type_info_u.err_struct=err_struct; + err_type_info.err_type_info_u.struct_hier=struct_hier; + + if (query_capabilities(path, err_type_info, &capabilities)<0) + continue; + + if (rd_status(path, &status)<0) + continue; + + if (status==0) { + cap=1; + printf("For err_sev=%d, err_struct=%d, struct_hier=%d: ", + err_sev, err_struct, struct_hier); + printf("capabilities 0x%lx\n", capabilities); + } + } + if (!cap) { + printf("No capabilities supported.\n"); + return 0; + } + + return 0; + } + + int err_inject(int cpu, char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t err_data_buffer) + { + int status; + char fn[MAX_FN_SIZE]; + + log_info(cpu, "err_type_info=%lx, err_struct_info=%lx, ", + err_type_info.err_type_info, + err_struct_info.err_struct_info); + log_info(cpu,"err_data_buffer=[%lx,%lx,%lx]\n", + err_data_buffer.err_data_buffer[0], + err_data_buffer.err_data_buffer[1], + err_data_buffer.err_data_buffer[2]); + sprintf(fn, "%s/err_type_info", path); + wr(fn, err_type_info.err_type_info); + sprintf(fn, "%s/err_struct_info", path); + wr(fn, err_struct_info.err_struct_info); + sprintf(fn, "%s/err_data_buffer", path); + wr_multi(fn, err_data_buffer.err_data_buffer, ERR_DATA_BUFFER_SIZE); + + // Fire pal_mc_error_inject procedure. + sprintf(fn, "%s/call_start", path); + wr(fn,mode); + + if (rd_status(path, &status)<0) { + vbprintf("fail: read status\n"); + return -100; + } + + if (status!=0) { + log_info(cpu, "fail: status=%d\n", status); + return status; + } + + return status; + } + + static int construct_data_buf(char *path, err_type_info_t err_type_info, + err_struct_info_t err_struct_info, + err_data_buffer_t *err_data_buffer, + void *va1) + { + char fn[MAX_FN_SIZE]; + u64 virt_addr=0, phys_addr=0; + + vbprintf("va1=%lx\n", (u64)va1); + memset(&err_data_buffer->err_data_buffer_cache, 0, ERR_DATA_BUFFER_SIZE*8); + + switch (err_type_info.err_type_info_u.err_struct) { + case 1: // Cache + switch (err_struct_info.err_struct_info_cache.cl_id) { + case 1: //Virtual addr + err_data_buffer->err_data_buffer_cache.inj_addr=(u64)va1; + break; + case 2: //Phys addr + sprintf(fn, "%s/virtual_to_phys", path); + virt_addr=(u64)va1; + if (wr(fn,virt_addr)<0) + return -1; + rd(fn, &phys_addr); + err_data_buffer->err_data_buffer_cache.inj_addr=phys_addr; + break; + default: + printf("Not supported cl_id\n"); + break; + } + break; + case 2: // TLB + break; + case 3: // Register file + break; + case 4: // Bus/system interconnect + default: + printf("Not supported err_struct\n"); + break; + } + + return 0; + } + + typedef struct { + u64 cpu; + u64 loop; + u64 interval; + u64 err_type_info; + u64 err_struct_info; + u64 err_data_buffer[ERR_DATA_BUFFER_SIZE]; + } parameters_t; + + parameters_t line_para; + int para; + + static int empty_data_buffer(u64 *err_data_buffer) + { + int empty=1; + int i; + + for (i=0;iMIN_INTERVAL + ?interval:MIN_INTERVAL; + parameters[num].err_type_info=err_type_info_conf; + parameters[num].err_struct_info=err_struct_info_conf; + memcpy(parameters[num++].err_data_buffer, + err_data_buffer_conf,ERR_DATA_BUFFER_SIZE*8) ; + + if (num>=MAX_TASK_NUM) + break; + } + } + else { + parameters[0].cpu=line_para.cpu; + parameters[0].loop=line_para.loop; + parameters[0].interval= line_para.interval>MIN_INTERVAL + ?line_para.interval:MIN_INTERVAL; + parameters[0].err_type_info=line_para.err_type_info; + parameters[0].err_struct_info=line_para.err_struct_info; + memcpy(parameters[0].err_data_buffer, + line_para.err_data_buffer,ERR_DATA_BUFFER_SIZE*8) ; + + num=1; + } + + /* Create semaphore: If one_lock, one semaphore for all processors. + Otherwise, one semaphore for each processor. */ + if (one_lock) { + if (create_sem(0)) { + printf("Can not create semaphore...exit\n"); + free_sem(0); + return -1; + } + } + else { + for (i=0;i + +Using the "epc" instruction effectively introduces a new mode of +execution to the ia64 linux kernel. We call this mode the +"fsys-mode". To recap, the normal states of execution are: + + - kernel mode: + Both the register stack and the memory stack have been + switched over to kernel memory. The user-level state is saved + in a pt-regs structure at the top of the kernel memory stack. + + - user mode: + Both the register stack and the kernel stack are in + user memory. The user-level state is contained in the + CPU registers. + + - bank 0 interruption-handling mode: + This is the non-interruptible state which all + interruption-handlers start execution in. The user-level + state remains in the CPU registers and some kernel state may + be stored in bank 0 of registers r16-r31. + +In contrast, fsys-mode has the following special properties: + + - execution is at privilege level 0 (most-privileged) + + - CPU registers may contain a mixture of user-level and kernel-level + state (it is the responsibility of the kernel to ensure that no + security-sensitive kernel-level state is leaked back to + user-level) + + - execution is interruptible and preemptible (an fsys-mode handler + can disable interrupts and avoid all other interruption-sources + to avoid preemption) + + - neither the memory-stack nor the register-stack can be trusted while + in fsys-mode (they point to the user-level stacks, which may + be invalid, or completely bogus addresses) + +In summary, fsys-mode is much more similar to running in user-mode +than it is to running in kernel-mode. Of course, given that the +privilege level is at level 0, this means that fsys-mode requires some +care (see below). + + +How to tell fsys-mode +===================== + +Linux operates in fsys-mode when (a) the privilege level is 0 (most +privileged) and (b) the stacks have NOT been switched to kernel memory +yet. For convenience, the header file provides +three macros:: + + user_mode(regs) + user_stack(task,regs) + fsys_mode(task,regs) + +The "regs" argument is a pointer to a pt_regs structure. The "task" +argument is a pointer to the task structure to which the "regs" +pointer belongs to. user_mode() returns TRUE if the CPU state pointed +to by "regs" was executing in user mode (privilege level 3). +user_stack() returns TRUE if the state pointed to by "regs" was +executing on the user-level stack(s). Finally, fsys_mode() returns +TRUE if the CPU state pointed to by "regs" was executing in fsys-mode. +The fsys_mode() macro is equivalent to the expression:: + + !user_mode(regs) && user_stack(task,regs) + +How to write an fsyscall handler +================================ + +The file arch/ia64/kernel/fsys.S contains a table of fsyscall-handlers +(fsyscall_table). This table contains one entry for each system call. +By default, a system call is handled by fsys_fallback_syscall(). This +routine takes care of entering (full) kernel mode and calling the +normal Linux system call handler. For performance-critical system +calls, it is possible to write a hand-tuned fsyscall_handler. For +example, fsys.S contains fsys_getpid(), which is a hand-tuned version +of the getpid() system call. + +The entry and exit-state of an fsyscall handler is as follows: + +Machine state on entry to fsyscall handler +------------------------------------------ + + ========= =============================================================== + r10 0 + r11 saved ar.pfs (a user-level value) + r15 system call number + r16 "current" task pointer (in normal kernel-mode, this is in r13) + r32-r39 system call arguments + b6 return address (a user-level value) + ar.pfs previous frame-state (a user-level value) + PSR.be cleared to zero (i.e., little-endian byte order is in effect) + - all other registers may contain values passed in from user-mode + ========= =============================================================== + +Required machine state on exit to fsyscall handler +-------------------------------------------------- + + ========= =========================================================== + r11 saved ar.pfs (as passed into the fsyscall handler) + r15 system call number (as passed into the fsyscall handler) + r32-r39 system call arguments (as passed into the fsyscall handler) + b6 return address (as passed into the fsyscall handler) + ar.pfs previous frame-state (as passed into the fsyscall handler) + ========= =========================================================== + +Fsyscall handlers can execute with very little overhead, but with that +speed comes a set of restrictions: + + * Fsyscall-handlers MUST check for any pending work in the flags + member of the thread-info structure and if any of the + TIF_ALLWORK_MASK flags are set, the handler needs to fall back on + doing a full system call (by calling fsys_fallback_syscall). + + * Fsyscall-handlers MUST preserve incoming arguments (r32-r39, r11, + r15, b6, and ar.pfs) because they will be needed in case of a + system call restart. Of course, all "preserved" registers also + must be preserved, in accordance to the normal calling conventions. + + * Fsyscall-handlers MUST check argument registers for containing a + NaT value before using them in any way that could trigger a + NaT-consumption fault. If a system call argument is found to + contain a NaT value, an fsyscall-handler may return immediately + with r8=EINVAL, r10=-1. + + * Fsyscall-handlers MUST NOT use the "alloc" instruction or perform + any other operation that would trigger mandatory RSE + (register-stack engine) traffic. + + * Fsyscall-handlers MUST NOT write to any stacked registers because + it is not safe to assume that user-level called a handler with the + proper number of arguments. + + * Fsyscall-handlers need to be careful when accessing per-CPU variables: + unless proper safe-guards are taken (e.g., interruptions are avoided), + execution may be pre-empted and resumed on another CPU at any given + time. + + * Fsyscall-handlers must be careful not to leak sensitive kernel' + information back to user-level. In particular, before returning to + user-level, care needs to be taken to clear any scratch registers + that could contain sensitive information (note that the current + task pointer is not considered sensitive: it's already exposed + through ar.k6). + + * Fsyscall-handlers MUST NOT access user-memory without first + validating access-permission (this can be done typically via + probe.r.fault and/or probe.w.fault) and without guarding against + memory access exceptions (this can be done with the EX() macros + defined by asmmacro.h). + +The above restrictions may seem draconian, but remember that it's +possible to trade off some of the restrictions by paying a slightly +higher overhead. For example, if an fsyscall-handler could benefit +from the shadow register bank, it could temporarily disable PSR.i and +PSR.ic, switch to bank 0 (bsw.0) and then use the shadow registers as +needed. In other words, following the above rules yields extremely +fast system call execution (while fully preserving system call +semantics), but there is also a lot of flexibility in handling more +complicated cases. + +Signal handling +=============== + +The delivery of (asynchronous) signals must be delayed until fsys-mode +is exited. This is accomplished with the help of the lower-privilege +transfer trap: arch/ia64/kernel/process.c:do_notify_resume_user() +checks whether the interrupted task was in fsys-mode and, if so, sets +PSR.lp and returns immediately. When fsys-mode is exited via the +"br.ret" instruction that lowers the privilege level, a trap will +occur. The trap handler clears PSR.lp again and returns immediately. +The kernel exit path then checks for and delivers any pending signals. + +PSR Handling +============ + +The "epc" instruction doesn't change the contents of PSR at all. This +is in contrast to a regular interruption, which clears almost all +bits. Because of that, some care needs to be taken to ensure things +work as expected. The following discussion describes how each PSR bit +is handled. + +======= ======================================================================= +PSR.be Cleared when entering fsys-mode. A srlz.d instruction is used + to ensure the CPU is in little-endian mode before the first + load/store instruction is executed. PSR.be is normally NOT + restored upon return from an fsys-mode handler. In other + words, user-level code must not rely on PSR.be being preserved + across a system call. +PSR.up Unchanged. +PSR.ac Unchanged. +PSR.mfl Unchanged. Note: fsys-mode handlers must not write-registers! +PSR.mfh Unchanged. Note: fsys-mode handlers must not write-registers! +PSR.ic Unchanged. Note: fsys-mode handlers can clear the bit, if needed. +PSR.i Unchanged. Note: fsys-mode handlers can clear the bit, if needed. +PSR.pk Unchanged. +PSR.dt Unchanged. +PSR.dfl Unchanged. Note: fsys-mode handlers must not write-registers! +PSR.dfh Unchanged. Note: fsys-mode handlers must not write-registers! +PSR.sp Unchanged. +PSR.pp Unchanged. +PSR.di Unchanged. +PSR.si Unchanged. +PSR.db Unchanged. The kernel prevents user-level from setting a hardware + breakpoint that triggers at any privilege level other than + 3 (user-mode). +PSR.lp Unchanged. +PSR.tb Lazy redirect. If a taken-branch trap occurs while in + fsys-mode, the trap-handler modifies the saved machine state + such that execution resumes in the gate page at + syscall_via_break(), with privilege level 3. Note: the + taken branch would occur on the branch invoking the + fsyscall-handler, at which point, by definition, a syscall + restart is still safe. If the system call number is invalid, + the fsys-mode handler will return directly to user-level. This + return will trigger a taken-branch trap, but since the trap is + taken _after_ restoring the privilege level, the CPU has already + left fsys-mode, so no special treatment is needed. +PSR.rt Unchanged. +PSR.cpl Cleared to 0. +PSR.is Unchanged (guaranteed to be 0 on entry to the gate page). +PSR.mc Unchanged. +PSR.it Unchanged (guaranteed to be 1). +PSR.id Unchanged. Note: the ia64 linux kernel never sets this bit. +PSR.da Unchanged. Note: the ia64 linux kernel never sets this bit. +PSR.dd Unchanged. Note: the ia64 linux kernel never sets this bit. +PSR.ss Lazy redirect. If set, "epc" will cause a Single Step Trap to + be taken. The trap handler then modifies the saved machine + state such that execution resumes in the gate page at + syscall_via_break(), with privilege level 3. +PSR.ri Unchanged. +PSR.ed Unchanged. Note: This bit could only have an effect if an fsys-mode + handler performed a speculative load that gets NaTted. If so, this + would be the normal & expected behavior, so no special treatment is + needed. +PSR.bn Unchanged. Note: fsys-mode handlers may clear the bit, if needed. + Doing so requires clearing PSR.i and PSR.ic as well. +PSR.ia Unchanged. Note: the ia64 linux kernel never sets this bit. +======= ======================================================================= + +Using fast system calls +======================= + +To use fast system calls, userspace applications need simply call +__kernel_syscall_via_epc(). For example + +-- example fgettimeofday() call -- + +-- fgettimeofday.S -- + +:: + + #include + + GLOBAL_ENTRY(fgettimeofday) + .prologue + .save ar.pfs, r11 + mov r11 = ar.pfs + .body + + mov r2 = 0xa000000000020660;; // gate address + // found by inspection of System.map for the + // __kernel_syscall_via_epc() function. See + // below for how to do this for real. + + mov b7 = r2 + mov r15 = 1087 // gettimeofday syscall + ;; + br.call.sptk.many b6 = b7 + ;; + + .restore sp + + mov ar.pfs = r11 + br.ret.sptk.many rp;; // return to caller + END(fgettimeofday) + +-- end fgettimeofday.S -- + +In reality, getting the gate address is accomplished by two extra +values passed via the ELF auxiliary vector (include/asm-ia64/elf.h) + + * AT_SYSINFO : is the address of __kernel_syscall_via_epc() + * AT_SYSINFO_EHDR : is the address of the kernel gate ELF DSO + +The ELF DSO is a pre-linked library that is mapped in by the kernel at +the gate page. It is a proper ELF shared object so, with a dynamic +loader that recognises the library, you should be able to make calls to +the exported functions within it as with any other shared library. +AT_SYSINFO points into the kernel DSO at the +__kernel_syscall_via_epc() function for historical reasons (it was +used before the kernel DSO) and as a convenience. diff --git a/Documentation/arch/ia64/ia64.rst b/Documentation/arch/ia64/ia64.rst new file mode 100644 index 000000000000..b725019a9492 --- /dev/null +++ b/Documentation/arch/ia64/ia64.rst @@ -0,0 +1,49 @@ +=========================================== +Linux kernel release for the IA-64 Platform +=========================================== + + These are the release notes for Linux since version 2.4 for IA-64 + platform. This document provides information specific to IA-64 + ONLY, to get additional information about the Linux kernel also + read the original Linux README provided with the kernel. + +Installing the Kernel +===================== + + - IA-64 kernel installation is the same as the other platforms, see + original README for details. + + +Software Requirements +===================== + + Compiling and running this kernel requires an IA-64 compliant GCC + compiler. And various software packages also compiled with an + IA-64 compliant GCC compiler. + + +Configuring the Kernel +====================== + + Configuration is the same, see original README for details. + + +Compiling the Kernel: + + - Compiling this kernel doesn't differ from other platform so read + the original README for details BUT make sure you have an IA-64 + compliant GCC compiler. + +IA-64 Specifics +=============== + + - General issues: + + * Hardly any performance tuning has been done. Obvious targets + include the library routines (IP checksum, etc.). Less + obvious targets include making sure we don't flush the TLB + needlessly, etc. + + * SMP locks cleanup/optimization + + * IA32 support. Currently experimental. It mostly works. diff --git a/Documentation/arch/ia64/index.rst b/Documentation/arch/ia64/index.rst new file mode 100644 index 000000000000..761f2154dfa2 --- /dev/null +++ b/Documentation/arch/ia64/index.rst @@ -0,0 +1,19 @@ +.. SPDX-License-Identifier: GPL-2.0 + +================== +IA-64 Architecture +================== + +.. toctree:: + :maxdepth: 1 + + ia64 + aliasing + efirtc + err_inject + fsys + irq-redir + mca + serial + + features diff --git a/Documentation/arch/ia64/irq-redir.rst b/Documentation/arch/ia64/irq-redir.rst new file mode 100644 index 000000000000..6bbbbe4f73ef --- /dev/null +++ b/Documentation/arch/ia64/irq-redir.rst @@ -0,0 +1,80 @@ +============================== +IRQ affinity on IA64 platforms +============================== + +07.01.2002, Erich Focht + + +By writing to /proc/irq/IRQ#/smp_affinity the interrupt routing can be +controlled. The behavior on IA64 platforms is slightly different from +that described in Documentation/core-api/irq/irq-affinity.rst for i386 systems. + +Because of the usage of SAPIC mode and physical destination mode the +IRQ target is one particular CPU and cannot be a mask of several +CPUs. Only the first non-zero bit is taken into account. + + +Usage examples +============== + +The target CPU has to be specified as a hexadecimal CPU mask. The +first non-zero bit is the selected CPU. This format has been kept for +compatibility reasons with i386. + +Set the delivery mode of interrupt 41 to fixed and route the +interrupts to CPU #3 (logical CPU number) (2^3=0x08):: + + echo "8" >/proc/irq/41/smp_affinity + +Set the default route for IRQ number 41 to CPU 6 in lowest priority +delivery mode (redirectable):: + + echo "r 40" >/proc/irq/41/smp_affinity + +The output of the command:: + + cat /proc/irq/IRQ#/smp_affinity + +gives the target CPU mask for the specified interrupt vector. If the CPU +mask is preceded by the character "r", the interrupt is redirectable +(i.e. lowest priority mode routing is used), otherwise its route is +fixed. + + + +Initialization and default behavior +=================================== + +If the platform features IRQ redirection (info provided by SAL) all +IO-SAPIC interrupts are initialized with CPU#0 as their default target +and the routing is the so called "lowest priority mode" (actually +fixed SAPIC mode with hint). The XTP chipset registers are used as hints +for the IRQ routing. Currently in Linux XTP registers can have three +values: + + - minimal for an idle task, + - normal if any other task runs, + - maximal if the CPU is going to be switched off. + +The IRQ is routed to the CPU with lowest XTP register value, the +search begins at the default CPU. Therefore most of the interrupts +will be handled by CPU #0. + +If the platform doesn't feature interrupt redirection IOSAPIC fixed +routing is used. The target CPUs are distributed in a round robin +manner. IRQs will be routed only to the selected target CPUs. Check +with:: + + cat /proc/interrupts + + + +Comments +======== + +On large (multi-node) systems it is recommended to route the IRQs to +the node to which the corresponding device is connected. +For systems like the NEC AzusA we get IRQ node-affinity for free. This +is because usually the chipsets on each node redirect the interrupts +only to their own CPUs (as they cannot see the XTP registers on the +other nodes). diff --git a/Documentation/arch/ia64/mca.rst b/Documentation/arch/ia64/mca.rst new file mode 100644 index 000000000000..08270bba44a4 --- /dev/null +++ b/Documentation/arch/ia64/mca.rst @@ -0,0 +1,198 @@ +============================================================= +An ad-hoc collection of notes on IA64 MCA and INIT processing +============================================================= + +Feel free to update it with notes about any area that is not clear. + +--- + +MCA/INIT are completely asynchronous. They can occur at any time, when +the OS is in any state. Including when one of the cpus is already +holding a spinlock. Trying to get any lock from MCA/INIT state is +asking for deadlock. Also the state of structures that are protected +by locks is indeterminate, including linked lists. + +--- + +The complicated ia64 MCA process. All of this is mandated by Intel's +specification for ia64 SAL, error recovery and unwind, it is not as +if we have a choice here. + +* MCA occurs on one cpu, usually due to a double bit memory error. + This is the monarch cpu. + +* SAL sends an MCA rendezvous interrupt (which is a normal interrupt) + to all the other cpus, the slaves. + +* Slave cpus that receive the MCA interrupt call down into SAL, they + end up spinning disabled while the MCA is being serviced. + +* If any slave cpu was already spinning disabled when the MCA occurred + then it cannot service the MCA interrupt. SAL waits ~20 seconds then + sends an unmaskable INIT event to the slave cpus that have not + already rendezvoused. + +* Because MCA/INIT can be delivered at any time, including when the cpu + is down in PAL in physical mode, the registers at the time of the + event are _completely_ undefined. In particular the MCA/INIT + handlers cannot rely on the thread pointer, PAL physical mode can + (and does) modify TP. It is allowed to do that as long as it resets + TP on return. However MCA/INIT events expose us to these PAL + internal TP changes. Hence curr_task(). + +* If an MCA/INIT event occurs while the kernel was running (not user + space) and the kernel has called PAL then the MCA/INIT handler cannot + assume that the kernel stack is in a fit state to be used. Mainly + because PAL may or may not maintain the stack pointer internally. + Because the MCA/INIT handlers cannot trust the kernel stack, they + have to use their own, per-cpu stacks. The MCA/INIT stacks are + preformatted with just enough task state to let the relevant handlers + do their job. + +* Unlike most other architectures, the ia64 struct task is embedded in + the kernel stack[1]. So switching to a new kernel stack means that + we switch to a new task as well. Because various bits of the kernel + assume that current points into the struct task, switching to a new + stack also means a new value for current. + +* Once all slaves have rendezvoused and are spinning disabled, the + monarch is entered. The monarch now tries to diagnose the problem + and decide if it can recover or not. + +* Part of the monarch's job is to look at the state of all the other + tasks. The only way to do that on ia64 is to call the unwinder, + as mandated by Intel. + +* The starting point for the unwind depends on whether a task is + running or not. That is, whether it is on a cpu or is blocked. The + monarch has to determine whether or not a task is on a cpu before it + knows how to start unwinding it. The tasks that received an MCA or + INIT event are no longer running, they have been converted to blocked + tasks. But (and its a big but), the cpus that received the MCA + rendezvous interrupt are still running on their normal kernel stacks! + +* To distinguish between these two cases, the monarch must know which + tasks are on a cpu and which are not. Hence each slave cpu that + switches to an MCA/INIT stack, registers its new stack using + set_curr_task(), so the monarch can tell that the _original_ task is + no longer running on that cpu. That gives us a decent chance of + getting a valid backtrace of the _original_ task. + +* MCA/INIT can be nested, to a depth of 2 on any cpu. In the case of a + nested error, we want diagnostics on the MCA/INIT handler that + failed, not on the task that was originally running. Again this + requires set_curr_task() so the MCA/INIT handlers can register their + own stack as running on that cpu. Then a recursive error gets a + trace of the failing handler's "task". + +[1] + My (Keith Owens) original design called for ia64 to separate its + struct task and the kernel stacks. Then the MCA/INIT data would be + chained stacks like i386 interrupt stacks. But that required + radical surgery on the rest of ia64, plus extra hard wired TLB + entries with its associated performance degradation. David + Mosberger vetoed that approach. Which meant that separate kernel + stacks meant separate "tasks" for the MCA/INIT handlers. + +--- + +INIT is less complicated than MCA. Pressing the nmi button or using +the equivalent command on the management console sends INIT to all +cpus. SAL picks one of the cpus as the monarch and the rest are +slaves. All the OS INIT handlers are entered at approximately the same +time. The OS monarch prints the state of all tasks and returns, after +which the slaves return and the system resumes. + +At least that is what is supposed to happen. Alas there are broken +versions of SAL out there. Some drive all the cpus as monarchs. Some +drive them all as slaves. Some drive one cpu as monarch, wait for that +cpu to return from the OS then drive the rest as slaves. Some versions +of SAL cannot even cope with returning from the OS, they spin inside +SAL on resume. The OS INIT code has workarounds for some of these +broken SAL symptoms, but some simply cannot be fixed from the OS side. + +--- + +The scheduler hooks used by ia64 (curr_task, set_curr_task) are layer +violations. Unfortunately MCA/INIT start off as massive layer +violations (can occur at _any_ time) and they build from there. + +At least ia64 makes an attempt at recovering from hardware errors, but +it is a difficult problem because of the asynchronous nature of these +errors. When processing an unmaskable interrupt we sometimes need +special code to cope with our inability to take any locks. + +--- + +How is ia64 MCA/INIT different from x86 NMI? + +* x86 NMI typically gets delivered to one cpu. MCA/INIT gets sent to + all cpus. + +* x86 NMI cannot be nested. MCA/INIT can be nested, to a depth of 2 + per cpu. + +* x86 has a separate struct task which points to one of multiple kernel + stacks. ia64 has the struct task embedded in the single kernel + stack, so switching stack means switching task. + +* x86 does not call the BIOS so the NMI handler does not have to worry + about any registers having changed. MCA/INIT can occur while the cpu + is in PAL in physical mode, with undefined registers and an undefined + kernel stack. + +* i386 backtrace is not very sensitive to whether a process is running + or not. ia64 unwind is very, very sensitive to whether a process is + running or not. + +--- + +What happens when MCA/INIT is delivered what a cpu is running user +space code? + +The user mode registers are stored in the RSE area of the MCA/INIT on +entry to the OS and are restored from there on return to SAL, so user +mode registers are preserved across a recoverable MCA/INIT. Since the +OS has no idea what unwind data is available for the user space stack, +MCA/INIT never tries to backtrace user space. Which means that the OS +does not bother making the user space process look like a blocked task, +i.e. the OS does not copy pt_regs and switch_stack to the user space +stack. Also the OS has no idea how big the user space RSE and memory +stacks are, which makes it too risky to copy the saved state to a user +mode stack. + +--- + +How do we get a backtrace on the tasks that were running when MCA/INIT +was delivered? + +mca.c:::ia64_mca_modify_original_stack(). That identifies and +verifies the original kernel stack, copies the dirty registers from +the MCA/INIT stack's RSE to the original stack's RSE, copies the +skeleton struct pt_regs and switch_stack to the original stack, fills +in the skeleton structures from the PAL minstate area and updates the +original stack's thread.ksp. That makes the original stack look +exactly like any other blocked task, i.e. it now appears to be +sleeping. To get a backtrace, just start with thread.ksp for the +original task and unwind like any other sleeping task. + +--- + +How do we identify the tasks that were running when MCA/INIT was +delivered? + +If the previous task has been verified and converted to a blocked +state, then sos->prev_task on the MCA/INIT stack is updated to point to +the previous task. You can look at that field in dumps or debuggers. +To help distinguish between the handler and the original tasks, +handlers have _TIF_MCA_INIT set in thread_info.flags. + +The sos data is always in the MCA/INIT handler stack, at offset +MCA_SOS_OFFSET. You can get that value from mca_asm.h or calculate it +as KERNEL_STACK_SIZE - sizeof(struct pt_regs) - sizeof(struct +ia64_sal_os_state), with 16 byte alignment for all structures. + +Also the comm field of the MCA/INIT task is modified to include the pid +of the original task, for humans to use. For example, a comm field of +'MCA 12159' means that pid 12159 was running when the MCA was +delivered. diff --git a/Documentation/arch/ia64/serial.rst b/Documentation/arch/ia64/serial.rst new file mode 100644 index 000000000000..1de70c305a79 --- /dev/null +++ b/Documentation/arch/ia64/serial.rst @@ -0,0 +1,165 @@ +============== +Serial Devices +============== + +Serial Device Naming +==================== + + As of 2.6.10, serial devices on ia64 are named based on the + order of ACPI and PCI enumeration. The first device in the + ACPI namespace (if any) becomes /dev/ttyS0, the second becomes + /dev/ttyS1, etc., and PCI devices are named sequentially + starting after the ACPI devices. + + Prior to 2.6.10, there were confusing exceptions to this: + + - Firmware on some machines (mostly from HP) provides an HCDP + table[1] that tells the kernel about devices that can be used + as a serial console. If the user specified "console=ttyS0" + or the EFI ConOut path contained only UART devices, the + kernel registered the device described by the HCDP as + /dev/ttyS0. + + - If there was no HCDP, we assumed there were UARTs at the + legacy COM port addresses (I/O ports 0x3f8 and 0x2f8), so + the kernel registered those as /dev/ttyS0 and /dev/ttyS1. + + Any additional ACPI or PCI devices were registered sequentially + after /dev/ttyS0 as they were discovered. + + With an HCDP, device names changed depending on EFI configuration + and "console=" arguments. Without an HCDP, device names didn't + change, but we registered devices that might not really exist. + + For example, an HP rx1600 with a single built-in serial port + (described in the ACPI namespace) plus an MP[2] (a PCI device) has + these ports: + + ========== ========== ============ ============ ======= + Type MMIO pre-2.6.10 pre-2.6.10 2.6.10+ + address + (EFI console (EFI console + on builtin) on MP port) + ========== ========== ============ ============ ======= + builtin 0xff5e0000 ttyS0 ttyS1 ttyS0 + MP UPS 0xf8031000 ttyS1 ttyS2 ttyS1 + MP Console 0xf8030000 ttyS2 ttyS0 ttyS2 + MP 2 0xf8030010 ttyS3 ttyS3 ttyS3 + MP 3 0xf8030038 ttyS4 ttyS4 ttyS4 + ========== ========== ============ ============ ======= + +Console Selection +================= + + EFI knows what your console devices are, but it doesn't tell the + kernel quite enough to actually locate them. The DIG64 HCDP + table[1] does tell the kernel where potential serial console + devices are, but not all firmware supplies it. Also, EFI supports + multiple simultaneous consoles and doesn't tell the kernel which + should be the "primary" one. + + So how do you tell Linux which console device to use? + + - If your firmware supplies the HCDP, it is simplest to + configure EFI with a single device (either a UART or a VGA + card) as the console. Then you don't need to tell Linux + anything; the kernel will automatically use the EFI console. + + (This works only in 2.6.6 or later; prior to that you had + to specify "console=ttyS0" to get a serial console.) + + - Without an HCDP, Linux defaults to a VGA console unless you + specify a "console=" argument. + + NOTE: Don't assume that a serial console device will be /dev/ttyS0. + It might be ttyS1, ttyS2, etc. Make sure you have the appropriate + entries in /etc/inittab (for getty) and /etc/securetty (to allow + root login). + +Early Serial Console +==================== + + The kernel can't start using a serial console until it knows where + the device lives. Normally this happens when the driver enumerates + all the serial devices, which can happen a minute or more after the + kernel starts booting. + + 2.6.10 and later kernels have an "early uart" driver that works + very early in the boot process. The kernel will automatically use + this if the user supplies an argument like "console=uart,io,0x3f8", + or if the EFI console path contains only a UART device and the + firmware supplies an HCDP. + +Troubleshooting Serial Console Problems +======================================= + + No kernel output after elilo prints "Uncompressing Linux... done": + + - You specified "console=ttyS0" but Linux changed the device + to which ttyS0 refers. Configure exactly one EFI console + device[3] and remove the "console=" option. + + - The EFI console path contains both a VGA device and a UART. + EFI and elilo use both, but Linux defaults to VGA. Remove + the VGA device from the EFI console path[3]. + + - Multiple UARTs selected as EFI console devices. EFI and + elilo use all selected devices, but Linux uses only one. + Make sure only one UART is selected in the EFI console + path[3]. + + - You're connected to an HP MP port[2] but have a non-MP UART + selected as EFI console device. EFI uses the MP as a + console device even when it isn't explicitly selected. + Either move the console cable to the non-MP UART, or change + the EFI console path[3] to the MP UART. + + Long pause (60+ seconds) between "Uncompressing Linux... done" and + start of kernel output: + + - No early console because you used "console=ttyS". Remove + the "console=" option if your firmware supplies an HCDP. + + - If you don't have an HCDP, the kernel doesn't know where + your console lives until the driver discovers serial + devices. Use "console=uart,io,0x3f8" (or appropriate + address for your machine). + + Kernel and init script output works fine, but no "login:" prompt: + + - Add getty entry to /etc/inittab for console tty. Look for + the "Adding console on ttyS" message that tells you which + device is the console. + + "login:" prompt, but can't login as root: + + - Add entry to /etc/securetty for console tty. + + No ACPI serial devices found in 2.6.17 or later: + + - Turn on CONFIG_PNP and CONFIG_PNPACPI. Prior to 2.6.17, ACPI + serial devices were discovered by 8250_acpi. In 2.6.17, + 8250_acpi was replaced by the combination of 8250_pnp and + CONFIG_PNPACPI. + + + +[1] + http://www.dig64.org/specifications/agreement + The table was originally defined as the "HCDP" for "Headless + Console/Debug Port." The current version is the "PCDP" for + "Primary Console and Debug Port Devices." + +[2] + The HP MP (management processor) is a PCI device that provides + several UARTs. One of the UARTs is often used as a console; the + EFI Boot Manager identifies it as "Acpi(HWP0002,700)/Pci(...)/Uart". + The external connection is usually a 25-pin connector, and a + special dongle converts that to three 9-pin connectors, one of + which is labelled "Console." + +[3] + EFI console devices are configured using the EFI Boot Manager + "Boot option maintenance" menu. You may have to interrupt the + boot sequence to use this menu, and you will have to reset the + box after changing console configuration. diff --git a/Documentation/arch/index.rst b/Documentation/arch/index.rst index 3f9962e45c09..41a66a8b38e4 100644 --- a/Documentation/arch/index.rst +++ b/Documentation/arch/index.rst @@ -12,6 +12,7 @@ implementation. arc/index arm/index arm64/index + ia64/index loongarch/index m68k/index mips/index diff --git a/Documentation/block/ioprio.rst b/Documentation/block/ioprio.rst index 4662e1ff3d81..2de8e227d292 100644 --- a/Documentation/block/ioprio.rst +++ b/Documentation/block/ioprio.rst @@ -79,6 +79,9 @@ ionice.c tool:: #elif defined(__x86_64__) #define __NR_ioprio_set 251 #define __NR_ioprio_get 252 + #elif defined(__ia64__) + #define __NR_ioprio_set 1274 + #define __NR_ioprio_get 1275 #else #error "Unsupported arch" #endif diff --git a/Documentation/core-api/cpu_hotplug.rst b/Documentation/core-api/cpu_hotplug.rst index dcb0e379e5e8..9511e405aabd 100644 --- a/Documentation/core-api/cpu_hotplug.rst +++ b/Documentation/core-api/cpu_hotplug.rst @@ -40,6 +40,12 @@ Command Line Switches supplied here is lower than the number of physically available CPUs, then those CPUs can not be brought online later. +``additional_cpus=n`` + Use this to limit hotpluggable CPUs. This option sets + ``cpu_possible_mask = cpu_present_mask + additional_cpus`` + + This option is limited to the IA64 architecture. + ``possible_cpus=n`` This option sets ``possible_cpus`` bits in ``cpu_possible_mask``. diff --git a/Documentation/core-api/debugging-via-ohci1394.rst b/Documentation/core-api/debugging-via-ohci1394.rst index cb3d3228dfc8..981ad4f89fd3 100644 --- a/Documentation/core-api/debugging-via-ohci1394.rst +++ b/Documentation/core-api/debugging-via-ohci1394.rst @@ -23,9 +23,9 @@ Retrieving a full system memory dump is also possible over the FireWire, using data transfer rates in the order of 10MB/s or more. With most FireWire controllers, memory access is limited to the low 4 GB -of physical address space. This can be a problem on machines where memory is -located mostly above that limit, but it is rarely a problem on more common -hardware such as x86, x86-64 and PowerPC. +of physical address space. This can be a problem on IA64 machines where +memory is located mostly above that limit, but it is rarely a problem on +more common hardware such as x86, x86-64 and PowerPC. At least LSI FW643e and FW643e2 controllers are known to support access to physical addresses above 4 GB, but this feature is currently not enabled by diff --git a/Documentation/features/core/cBPF-JIT/arch-support.txt b/Documentation/features/core/cBPF-JIT/arch-support.txt index 937840080de7..0a1f5bb7eeb9 100644 --- a/Documentation/features/core/cBPF-JIT/arch-support.txt +++ b/Documentation/features/core/cBPF-JIT/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/eBPF-JIT/arch-support.txt b/Documentation/features/core/eBPF-JIT/arch-support.txt index 7434b43c2ff8..6c0f3d759e6a 100644 --- a/Documentation/features/core/eBPF-JIT/arch-support.txt +++ b/Documentation/features/core/eBPF-JIT/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/generic-idle-thread/arch-support.txt b/Documentation/features/core/generic-idle-thread/arch-support.txt index 0735cb5367b4..0b94099cf6ac 100644 --- a/Documentation/features/core/generic-idle-thread/arch-support.txt +++ b/Documentation/features/core/generic-idle-thread/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | + | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/jump-labels/arch-support.txt b/Documentation/features/core/jump-labels/arch-support.txt index ccada815569f..94d9dece580f 100644 --- a/Documentation/features/core/jump-labels/arch-support.txt +++ b/Documentation/features/core/jump-labels/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/thread-info-in-task/arch-support.txt b/Documentation/features/core/thread-info-in-task/arch-support.txt index 2afeb6bf6e64..97c65ed2ac23 100644 --- a/Documentation/features/core/thread-info-in-task/arch-support.txt +++ b/Documentation/features/core/thread-info-in-task/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/core/tracehook/arch-support.txt b/Documentation/features/core/tracehook/arch-support.txt index a72330e25542..aed5679da651 100644 --- a/Documentation/features/core/tracehook/arch-support.txt +++ b/Documentation/features/core/tracehook/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | + | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/KASAN/arch-support.txt b/Documentation/features/debug/KASAN/arch-support.txt index 39c6e78c0cbe..c4581c2edb28 100644 --- a/Documentation/features/debug/KASAN/arch-support.txt +++ b/Documentation/features/debug/KASAN/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt index bbf029f095cb..9ec5d13f4939 100644 --- a/Documentation/features/debug/debug-vm-pgtable/arch-support.txt +++ b/Documentation/features/debug/debug-vm-pgtable/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/gcov-profile-all/arch-support.txt b/Documentation/features/debug/gcov-profile-all/arch-support.txt index 63494bddc263..dc4014f7e1f8 100644 --- a/Documentation/features/debug/gcov-profile-all/arch-support.txt +++ b/Documentation/features/debug/gcov-profile-all/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/debug/kcov/arch-support.txt b/Documentation/features/debug/kcov/arch-support.txt index 4449e1f55cd8..de84cefbcdd3 100644 --- a/Documentation/features/debug/kcov/arch-support.txt +++ b/Documentation/features/debug/kcov/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/kgdb/arch-support.txt b/Documentation/features/debug/kgdb/arch-support.txt index f287f16ce0ec..5e91ec78c80b 100644 --- a/Documentation/features/debug/kgdb/arch-support.txt +++ b/Documentation/features/debug/kgdb/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | ok | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/debug/kmemleak/arch-support.txt b/Documentation/features/debug/kmemleak/arch-support.txt index f45149cfa313..4e205ef70363 100644 --- a/Documentation/features/debug/kmemleak/arch-support.txt +++ b/Documentation/features/debug/kmemleak/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt index 02febc883588..38a0a54b79f5 100644 --- a/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt +++ b/Documentation/features/debug/kprobes-on-ftrace/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | TODO | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/kprobes/arch-support.txt b/Documentation/features/debug/kprobes/arch-support.txt index 1ea27aedd098..aad83b57587a 100644 --- a/Documentation/features/debug/kprobes/arch-support.txt +++ b/Documentation/features/debug/kprobes/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/kretprobes/arch-support.txt b/Documentation/features/debug/kretprobes/arch-support.txt index 022be42e64f9..61380010a4a7 100644 --- a/Documentation/features/debug/kretprobes/arch-support.txt +++ b/Documentation/features/debug/kretprobes/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/optprobes/arch-support.txt b/Documentation/features/debug/optprobes/arch-support.txt index 92f5d0f444fa..83a4639a5c0a 100644 --- a/Documentation/features/debug/optprobes/arch-support.txt +++ b/Documentation/features/debug/optprobes/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/stackprotector/arch-support.txt b/Documentation/features/debug/stackprotector/arch-support.txt index de8f43f2e5d6..4c64c5d596f7 100644 --- a/Documentation/features/debug/stackprotector/arch-support.txt +++ b/Documentation/features/debug/stackprotector/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/uprobes/arch-support.txt b/Documentation/features/debug/uprobes/arch-support.txt index 0c698003ce9c..24c8423b0abc 100644 --- a/Documentation/features/debug/uprobes/arch-support.txt +++ b/Documentation/features/debug/uprobes/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/debug/user-ret-profiler/arch-support.txt b/Documentation/features/debug/user-ret-profiler/arch-support.txt index 3e431767581d..059110a5fa6e 100644 --- a/Documentation/features/debug/user-ret-profiler/arch-support.txt +++ b/Documentation/features/debug/user-ret-profiler/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/io/dma-contiguous/arch-support.txt b/Documentation/features/io/dma-contiguous/arch-support.txt index 3c6ce35d704f..bfe0921a3853 100644 --- a/Documentation/features/io/dma-contiguous/arch-support.txt +++ b/Documentation/features/io/dma-contiguous/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/locking/cmpxchg-local/arch-support.txt b/Documentation/features/locking/cmpxchg-local/arch-support.txt index 2c3a4b91f16d..68329e96dffa 100644 --- a/Documentation/features/locking/cmpxchg-local/arch-support.txt +++ b/Documentation/features/locking/cmpxchg-local/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/locking/lockdep/arch-support.txt b/Documentation/features/locking/lockdep/arch-support.txt index b6b00469f7d0..a36e231670d7 100644 --- a/Documentation/features/locking/lockdep/arch-support.txt +++ b/Documentation/features/locking/lockdep/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/locking/queued-rwlocks/arch-support.txt b/Documentation/features/locking/queued-rwlocks/arch-support.txt index b286a5fff283..5deb845477e4 100644 --- a/Documentation/features/locking/queued-rwlocks/arch-support.txt +++ b/Documentation/features/locking/queued-rwlocks/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/locking/queued-spinlocks/arch-support.txt b/Documentation/features/locking/queued-spinlocks/arch-support.txt index 22f2990392ff..2d3961bfef5d 100644 --- a/Documentation/features/locking/queued-spinlocks/arch-support.txt +++ b/Documentation/features/locking/queued-spinlocks/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/perf/kprobes-event/arch-support.txt b/Documentation/features/perf/kprobes-event/arch-support.txt index 713a69fcd697..641a7d2ff2a3 100644 --- a/Documentation/features/perf/kprobes-event/arch-support.txt +++ b/Documentation/features/perf/kprobes-event/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/perf/perf-regs/arch-support.txt b/Documentation/features/perf/perf-regs/arch-support.txt index 09431518b0e8..33866eb242c1 100644 --- a/Documentation/features/perf/perf-regs/arch-support.txt +++ b/Documentation/features/perf/perf-regs/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/perf/perf-stackdump/arch-support.txt b/Documentation/features/perf/perf-stackdump/arch-support.txt index f9db4dd8ef79..c8e4c7c65012 100644 --- a/Documentation/features/perf/perf-stackdump/arch-support.txt +++ b/Documentation/features/perf/perf-stackdump/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/sched/membarrier-sync-core/arch-support.txt b/Documentation/features/sched/membarrier-sync-core/arch-support.txt index d96b778b87ed..23260ca44946 100644 --- a/Documentation/features/sched/membarrier-sync-core/arch-support.txt +++ b/Documentation/features/sched/membarrier-sync-core/arch-support.txt @@ -35,6 +35,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/sched/numa-balancing/arch-support.txt b/Documentation/features/sched/numa-balancing/arch-support.txt index 984601c7c479..532cc67cdf92 100644 --- a/Documentation/features/sched/numa-balancing/arch-support.txt +++ b/Documentation/features/sched/numa-balancing/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | .. | | hexagon: | .. | + | ia64: | TODO | | loongarch: | ok | | m68k: | .. | | microblaze: | .. | diff --git a/Documentation/features/seccomp/seccomp-filter/arch-support.txt b/Documentation/features/seccomp/seccomp-filter/arch-support.txt index 13feb679649e..3a7237b989cd 100644 --- a/Documentation/features/seccomp/seccomp-filter/arch-support.txt +++ b/Documentation/features/seccomp/seccomp-filter/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | ok | | microblaze: | TODO | diff --git a/Documentation/features/time/arch-tick-broadcast/arch-support.txt b/Documentation/features/time/arch-tick-broadcast/arch-support.txt index ccba965e8d07..9bffac80019e 100644 --- a/Documentation/features/time/arch-tick-broadcast/arch-support.txt +++ b/Documentation/features/time/arch-tick-broadcast/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/time/clockevents/arch-support.txt b/Documentation/features/time/clockevents/arch-support.txt index 4d4bfac52970..625160048f68 100644 --- a/Documentation/features/time/clockevents/arch-support.txt +++ b/Documentation/features/time/clockevents/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | ok | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | ok | diff --git a/Documentation/features/time/context-tracking/arch-support.txt b/Documentation/features/time/context-tracking/arch-support.txt index 891be9f61903..72bc5bad0348 100644 --- a/Documentation/features/time/context-tracking/arch-support.txt +++ b/Documentation/features/time/context-tracking/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/time/irq-time-acct/arch-support.txt b/Documentation/features/time/irq-time-acct/arch-support.txt index 3d10075a8a8a..ceb036610d09 100644 --- a/Documentation/features/time/irq-time-acct/arch-support.txt +++ b/Documentation/features/time/irq-time-acct/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | .. | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/time/virt-cpuacct/arch-support.txt b/Documentation/features/time/virt-cpuacct/arch-support.txt index 21f11d47ef72..c063dffd5261 100644 --- a/Documentation/features/time/virt-cpuacct/arch-support.txt +++ b/Documentation/features/time/virt-cpuacct/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | ok | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/ELF-ASLR/arch-support.txt b/Documentation/features/vm/ELF-ASLR/arch-support.txt index 57406c0d5353..47909c3dd602 100644 --- a/Documentation/features/vm/ELF-ASLR/arch-support.txt +++ b/Documentation/features/vm/ELF-ASLR/arch-support.txt @@ -13,6 +13,7 @@ | arm64: | ok | | csky: | ok | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/PG_uncached/arch-support.txt b/Documentation/features/vm/PG_uncached/arch-support.txt index 5a7508b8c967..5acd64b97dba 100644 --- a/Documentation/features/vm/PG_uncached/arch-support.txt +++ b/Documentation/features/vm/PG_uncached/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | TODO | | csky: | TODO | | hexagon: | TODO | + | ia64: | ok | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/THP/arch-support.txt b/Documentation/features/vm/THP/arch-support.txt index b4a5ce16940d..9dd7d75d0465 100644 --- a/Documentation/features/vm/THP/arch-support.txt +++ b/Documentation/features/vm/THP/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | .. | | hexagon: | .. | + | ia64: | TODO | | loongarch: | ok | | m68k: | .. | | microblaze: | .. | diff --git a/Documentation/features/vm/TLB/arch-support.txt b/Documentation/features/vm/TLB/arch-support.txt index d222bd3ee749..25cc35b00924 100644 --- a/Documentation/features/vm/TLB/arch-support.txt +++ b/Documentation/features/vm/TLB/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | .. | | microblaze: | .. | diff --git a/Documentation/features/vm/huge-vmap/arch-support.txt b/Documentation/features/vm/huge-vmap/arch-support.txt index 2d6de7b04538..34647d9bdca4 100644 --- a/Documentation/features/vm/huge-vmap/arch-support.txt +++ b/Documentation/features/vm/huge-vmap/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | TODO | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/ioremap_prot/arch-support.txt b/Documentation/features/vm/ioremap_prot/arch-support.txt index 1638c2cb17f1..a24149e59d73 100644 --- a/Documentation/features/vm/ioremap_prot/arch-support.txt +++ b/Documentation/features/vm/ioremap_prot/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/features/vm/pte_special/arch-support.txt b/Documentation/features/vm/pte_special/arch-support.txt index 3f777f8b67d5..d2b22a06945e 100644 --- a/Documentation/features/vm/pte_special/arch-support.txt +++ b/Documentation/features/vm/pte_special/arch-support.txt @@ -12,6 +12,7 @@ | arm64: | ok | | csky: | TODO | | hexagon: | TODO | + | ia64: | TODO | | loongarch: | ok | | m68k: | TODO | | microblaze: | TODO | diff --git a/Documentation/kbuild/makefiles.rst b/Documentation/kbuild/makefiles.rst index ad118b7a1806..e9299971220a 100644 --- a/Documentation/kbuild/makefiles.rst +++ b/Documentation/kbuild/makefiles.rst @@ -53,7 +53,7 @@ knowledge about the kernel Makefiles, plus detailed knowledge about the public interface for kbuild. *Arch developers* are people who work on an entire architecture, such -as sparc or x86. Arch developers need to know about the arch Makefile +as sparc or ia64. Arch developers need to know about the arch Makefile as well as kbuild Makefiles. *Kbuild developers* are people who work on the kernel build system itself. diff --git a/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst b/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst index d731b5a98561..c5673ec4559b 100644 --- a/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst +++ b/Documentation/networking/device_drivers/ethernet/neterion/s2io.rst @@ -64,8 +64,8 @@ c. Multi-buffer receive mode. Scattering of packet across multiple IBM xSeries). d. MSI/MSI-X. Can be enabled on platforms which support this feature - resulting in noticeable performance improvement (up to 7% on certain - platforms). + (IA64, Xeon) resulting in noticeable performance improvement(up to 7% + on certain platforms). e. Statistics. Comprehensive MAC-level and software statistics displayed using "ethtool -S" option. diff --git a/Documentation/scheduler/sched-arch.rst b/Documentation/scheduler/sched-arch.rst index ed07efea7d02..505cd27f9a92 100644 --- a/Documentation/scheduler/sched-arch.rst +++ b/Documentation/scheduler/sched-arch.rst @@ -10,7 +10,7 @@ Context switch By default, the switch_to arch function is called with the runqueue locked. This is usually not a problem unless switch_to may need to take the runqueue lock. This is usually due to a wake up operation in -the context switch. +the context switch. See arch/ia64/include/asm/switch_to.h for an example. To request the scheduler call switch_to with the runqueue unlocked, you must `#define __ARCH_WANT_UNLOCKED_CTXSW` in a header file @@ -68,5 +68,7 @@ Possible arch/ problems Possible arch problems I found (and either tried to fix or didn't): +ia64 - is safe_halt call racy vs interrupts? (does it sleep?) (See #4a) + sparc - IRQs on at this point(?), change local_irq_save to _disable. - TODO: needs secondary CPUs to disable preempt (See #1) diff --git a/Documentation/trace/kprobes.rst b/Documentation/trace/kprobes.rst index e1636e579c9c..7927742f13d3 100644 --- a/Documentation/trace/kprobes.rst +++ b/Documentation/trace/kprobes.rst @@ -315,6 +315,7 @@ architectures: - i386 (Supports jump optimization) - x86_64 (AMD-64, EM64T) (Supports jump optimization) - ppc64 +- ia64 (Does not support probes on instruction slot1.) - sparc64 (Return probes not yet implemented.) - arm - ppc diff --git a/Documentation/translations/zh_CN/arch/index.rst b/Documentation/translations/zh_CN/arch/index.rst index 71186d9df7c9..c4c2e16f629c 100644 --- a/Documentation/translations/zh_CN/arch/index.rst +++ b/Documentation/translations/zh_CN/arch/index.rst @@ -18,6 +18,7 @@ TODOList: * arm/index +* ia64/index * m68k/index * nios2/index * powerpc/index diff --git a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst index bc0d7ea6d834..4772a900c37a 100644 --- a/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst +++ b/Documentation/translations/zh_CN/core-api/cpu_hotplug.rst @@ -49,6 +49,12 @@ CPU热拔插支持的一个更新颖的用途是它在SMP的暂停恢复支持 限制内核将支持的CPU总量。如果这里提供的数量低于实际可用的CPU数量,那么其他CPU 以后就不能上线了。 +``additional_cpus=n`` + 使用它来限制可热插拔的CPU。该选项设置 + ``cpu_possible_mask = cpu_present_mask + additional_cpus`` + + 这个选项只限于IA64架构。 + ``possible_cpus=n`` 这个选项设置 ``cpu_possible_mask`` 中的 ``possible_cpus`` 位。 diff --git a/Documentation/translations/zh_CN/scheduler/sched-arch.rst b/Documentation/translations/zh_CN/scheduler/sched-arch.rst index b2ac3c743a3a..ce3f39d9b3cb 100644 --- a/Documentation/translations/zh_CN/scheduler/sched-arch.rst +++ b/Documentation/translations/zh_CN/scheduler/sched-arch.rst @@ -20,7 +20,8 @@ ========== 1. 运行队列锁 默认情况下,switch_to arch函数在调用时锁定了运行队列。这通常不是一个问题,除非 -switch_to可能需要获取运行队列锁。这通常是由于上下文切换中的唤醒操作造成的。 +switch_to可能需要获取运行队列锁。这通常是由于上下文切换中的唤醒操作造成的。见 +arch/ia64/include/asm/switch_to.h的例子。 为了要求调度器在运行队列解锁的情况下调用switch_to,你必须在头文件 中`#define __ARCH_WANT_UNLOCKED_CTXSW`(通常是定义switch_to的那个文件)。 @@ -67,5 +68,7 @@ arch/x86/kernel/process.c有轮询和睡眠空闲函数的例子。 我发现的可能的arch问题(并试图解决或没有解决)。: +ia64 - safe_halt的调用与中断相比,是否很荒谬? (它睡眠了吗) (参考 #4a) + sparc - 在这一点上,IRQ是开着的(?),把local_irq_save改为_disable。 - 待办事项: 需要第二个CPU来禁用抢占 (参考 #1) diff --git a/MAINTAINERS b/MAINTAINERS index 1aabf1c15bb3..2169ea3bd078 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10199,6 +10199,12 @@ F: Documentation/driver-api/i3c F: drivers/i3c/ F: include/linux/i3c/ +IA64 (Itanium) PLATFORM +L: linux-ia64@vger.kernel.org +S: Orphan +F: Documentation/arch/ia64/ +F: arch/ia64/ + IBM Operation Panel Input Driver M: Eddie James L: linux-input@vger.kernel.org @@ -16685,6 +16691,11 @@ L: linux-i2c@vger.kernel.org S: Maintained F: drivers/i2c/muxes/i2c-mux-pca9541.c +PCDP - PRIMARY CONSOLE AND DEBUG PORT +M: Khalid Aziz +S: Maintained +F: drivers/firmware/pcdp.* + PCI DRIVER FOR AARDVARK (Marvell Armada 3700) M: Thomas Petazzoni M: Pali Rohár diff --git a/Makefile b/Makefile index c7ee53f4bf04..92ddd4f49d7e 100644 --- a/Makefile +++ b/Makefile @@ -375,7 +375,7 @@ include $(srctree)/scripts/subarch.include # When performing cross compilation for other architectures ARCH shall be set # to the target architecture. (See arch/* for the possibilities). # ARCH can be set during invocation of make: -# make ARCH=arm64 +# make ARCH=ia64 # Another way is to have ARCH set in the environment. # The default ARCH is the host where make is executed. @@ -383,7 +383,7 @@ include $(srctree)/scripts/subarch.include # during compilation. Only gcc and related bin-utils executables # are prefixed with $(CROSS_COMPILE). # CROSS_COMPILE can be set on the command line -# make CROSS_COMPILE=aarch64-linux-gnu- +# make CROSS_COMPILE=ia64-linux- # Alternatively CROSS_COMPILE can be set in the environment. # Default value for CROSS_COMPILE is not to prefix executables # Note: Some architectures assign CROSS_COMPILE in their arch/*/Makefile diff --git a/arch/Kconfig b/arch/Kconfig index a5af0edd3eb8..fcd607174777 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -306,8 +306,17 @@ config ARCH_HAS_CPU_PASID bool select IOMMU_MM_DATA +# Select if arch init_task must go in the __init_task_data section +config ARCH_TASK_STRUCT_ON_STACK + bool + +# Select if arch has its private alloc_task_struct() function +config ARCH_TASK_STRUCT_ALLOCATOR + bool + config HAVE_ARCH_THREAD_STRUCT_WHITELIST bool + depends on !ARCH_TASK_STRUCT_ALLOCATOR help An architecture should select this to provide hardened usercopy knowledge about what region of the thread_struct should be @@ -316,6 +325,10 @@ config HAVE_ARCH_THREAD_STRUCT_WHITELIST should be implemented. Without this, the entire thread_struct field in task_struct will be left whitelisted. +# Select if arch has its private alloc_thread_stack() function +config ARCH_THREAD_STACK_ALLOCATOR + bool + # Select if arch wants to size task_struct dynamically via arch_task_struct_size: config ARCH_WANTS_DYNAMIC_TASK_STRUCT bool @@ -1081,6 +1094,7 @@ config HAVE_ARCH_COMPAT_MMAP_BASES config PAGE_SIZE_LESS_THAN_64KB def_bool y depends on !ARM64_64K_PAGES + depends on !IA64_PAGE_SIZE_64KB depends on !PAGE_SIZE_64KB depends on !PARISC_PAGE_SIZE_64KB depends on PAGE_SIZE_LESS_THAN_256KB diff --git a/arch/ia64/Kbuild b/arch/ia64/Kbuild new file mode 100644 index 000000000000..e77cc76d228c --- /dev/null +++ b/arch/ia64/Kbuild @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +obj-y += kernel/ mm/ +obj-$(CONFIG_IA64_SGI_UV) += uv/ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig new file mode 100644 index 000000000000..53faa122b0f4 --- /dev/null +++ b/arch/ia64/Kconfig @@ -0,0 +1,394 @@ +# SPDX-License-Identifier: GPL-2.0 +config PGTABLE_LEVELS + int "Page Table Levels" if !IA64_PAGE_SIZE_64KB + range 3 4 if !IA64_PAGE_SIZE_64KB + default 3 + +menu "Processor type and features" + +config IA64 + bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS + select ARCH_HAS_CPU_FINALIZE_INIT + select ARCH_HAS_DMA_MARK_CLEAN + select ARCH_HAS_STRNCPY_FROM_USER + select ARCH_HAS_STRNLEN_USER + select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_MIGHT_HAVE_PC_SERIO + select ACPI + select ACPI_NUMA if NUMA + select ARCH_ENABLE_MEMORY_HOTPLUG + select ARCH_ENABLE_MEMORY_HOTREMOVE + select ARCH_SUPPORTS_ACPI + select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI + select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI + select FORCE_PCI + select PCI_DOMAINS if PCI + select PCI_MSI + select PCI_SYSCALL if PCI + select HAS_IOPORT + select HAVE_ASM_MODVERSIONS + select HAVE_UNSTABLE_SCHED_CLOCK + select HAVE_EXIT_THREAD + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_DYNAMIC_FTRACE if (!ITANIUM) + select HAVE_FUNCTION_TRACER + select HAVE_SETUP_PER_CPU_AREA + select TTY + select HAVE_ARCH_TRACEHOOK + select HAVE_FUNCTION_DESCRIPTORS + select HAVE_VIRT_CPU_ACCOUNTING + select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE + select GENERIC_IRQ_PROBE + select GENERIC_PENDING_IRQ if SMP + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_LEGACY + select ARCH_HAVE_NMI_SAFE_CMPXCHG + select GENERIC_IOMAP + select GENERIC_IOREMAP + select GENERIC_SMP_IDLE_THREAD + select ARCH_TASK_STRUCT_ON_STACK + select ARCH_TASK_STRUCT_ALLOCATOR + select ARCH_THREAD_STACK_ALLOCATOR + select ARCH_CLOCKSOURCE_DATA + select GENERIC_TIME_VSYSCALL + select LEGACY_TIMER_TICK + select SWIOTLB + select SYSCTL_ARCH_UNALIGN_NO_WARN + select HAVE_MOD_ARCH_SPECIFIC + select MODULES_USE_ELF_RELA + select ARCH_USE_CMPXCHG_LOCKREF + select HAVE_ARCH_AUDITSYSCALL + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH + select NUMA if !FLATMEM + select PCI_MSI_ARCH_FALLBACKS if PCI_MSI + select ZONE_DMA32 + select FUNCTION_ALIGNMENT_32B + default y + help + The Itanium Processor Family is Intel's 64-bit successor to + the 32-bit X86 line. The IA-64 Linux project has a home + page at and a mailing list at + . + +config 64BIT + bool + select ATA_NONSTANDARD if ATA + default y + +config MMU + bool + default y + +config STACKTRACE_SUPPORT + def_bool y + +config GENERIC_LOCKBREAK + def_bool n + +config GENERIC_CALIBRATE_DELAY + bool + default y + +config DMI + bool + default y + select DMI_SCAN_MACHINE_NON_EFI_FALLBACK + +config EFI + bool + select UCS2_STRING + default y + +config SCHED_OMIT_FRAME_POINTER + bool + default y + +config IA64_UNCACHED_ALLOCATOR + bool + select GENERIC_ALLOCATOR + +config ARCH_USES_PG_UNCACHED + def_bool y + depends on IA64_UNCACHED_ALLOCATOR + +config AUDIT_ARCH + bool + default y + +choice + prompt "Processor type" + default ITANIUM + +config ITANIUM + bool "Itanium" + help + Select your IA-64 processor type. The default is Itanium. + This choice is safe for all IA-64 systems, but may not perform + optimally on systems with, say, Itanium 2 or newer processors. + +config MCKINLEY + bool "Itanium 2" + help + Select this to configure for an Itanium 2 (McKinley) processor. + +endchoice + +choice + prompt "Kernel page size" + default IA64_PAGE_SIZE_16KB + +config IA64_PAGE_SIZE_4KB + bool "4KB" + help + This lets you select the page size of the kernel. For best IA-64 + performance, a page size of 8KB or 16KB is recommended. For best + IA-32 compatibility, a page size of 4KB should be selected (the vast + majority of IA-32 binaries work perfectly fine with a larger page + size). For Itanium 2 or newer systems, a page size of 64KB can also + be selected. + + 4KB For best IA-32 compatibility + 8KB For best IA-64 performance + 16KB For best IA-64 performance + 64KB Requires Itanium 2 or newer processor. + + If you don't know what to do, choose 16KB. + +config IA64_PAGE_SIZE_8KB + bool "8KB" + +config IA64_PAGE_SIZE_16KB + bool "16KB" + +config IA64_PAGE_SIZE_64KB + depends on !ITANIUM + bool "64KB" + +endchoice + +source "kernel/Kconfig.hz" + +config IA64_BRL_EMU + bool + depends on ITANIUM + default y + +# align cache-sensitive data to 128 bytes +config IA64_L1_CACHE_SHIFT + int + default "7" if MCKINLEY + default "6" if ITANIUM + +config IA64_SGI_UV + bool "SGI-UV support" + help + Selecting this option will add specific support for running on SGI + UV based systems. If you have an SGI UV system or are building a + distro kernel, select this option. + +config IA64_HP_SBA_IOMMU + bool "HP SBA IOMMU support" + select DMA_OPS + default y + help + Say Y here to add support for the SBA IOMMU found on HP zx1 and + sx1000 systems. If you're unsure, answer Y. + +config IA64_CYCLONE + bool "Cyclone (EXA) Time Source support" + help + Say Y here to enable support for IBM EXA Cyclone time source. + If you're unsure, answer N. + +config ARCH_FORCE_MAX_ORDER + int + default "16" if HUGETLB_PAGE + default "10" + +config SMP + bool "Symmetric multi-processing support" + help + This enables support for systems with more than one CPU. If you have + a system with only one CPU, say N. If you have a system with more + than one CPU, say Y. + + If you say N here, the kernel will run on single and multiprocessor + systems, but will use only one CPU of a multiprocessor system. If + you say Y here, the kernel will run on many, but not all, + single processor systems. On a single processor system, the kernel + will run faster if you say N here. + + See also the SMP-HOWTO available at + . + + If you don't know what to do here, say N. + +config NR_CPUS + int "Maximum number of CPUs (2-4096)" + range 2 4096 + depends on SMP + default "4096" + help + You should set this to the number of CPUs in your system, but + keep in mind that a kernel compiled for, e.g., 2 CPUs will boot but + only use 2 CPUs on a >2 CPU system. Setting this to a value larger + than 64 will cause the use of a CPU mask array, causing a small + performance hit. + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP + default n + help + Say Y here to experiment with turning CPUs off and on. CPUs + can be controlled through /sys/devices/system/cpu/cpu#. + Say N if you want to disable CPU hotplug. + +config SCHED_SMT + bool "SMT scheduler support" + depends on SMP + help + Improves the CPU scheduler's decision making when dealing with + Intel IA64 chips with MultiThreading at a cost of slightly increased + overhead in some places. If unsure say N here. + +config PERMIT_BSP_REMOVE + bool "Support removal of Bootstrap Processor" + depends on HOTPLUG_CPU + default n + help + Say Y here if your platform SAL will support removal of BSP with HOTPLUG_CPU + support. + +config FORCE_CPEI_RETARGET + bool "Force assumption that CPEI can be re-targeted" + depends on PERMIT_BSP_REMOVE + default n + help + Say Y if you need to force the assumption that CPEI can be re-targeted to + any cpu in the system. This hint is available via ACPI 3.0 specifications. + Tiger4 systems are capable of re-directing CPEI to any CPU other than BSP. + This option it useful to enable this feature on older BIOS's as well. + You can also enable this by using boot command line option force_cpei=1. + +config ARCH_SELECT_MEMORY_MODEL + def_bool y + +config ARCH_FLATMEM_ENABLE + def_bool y + +config ARCH_SPARSEMEM_ENABLE + def_bool y + select SPARSEMEM_VMEMMAP_ENABLE + +config ARCH_SPARSEMEM_DEFAULT + def_bool y + depends on ARCH_SPARSEMEM_ENABLE + +config NUMA + bool "NUMA support" + depends on !FLATMEM + select SMP + select USE_PERCPU_NUMA_NODE_ID + help + Say Y to compile the kernel to support NUMA (Non-Uniform Memory + Access). This option is for configuring high-end multiprocessor + server systems. If in doubt, say N. + +config NODES_SHIFT + int "Max num nodes shift(3-10)" + range 3 10 + default "10" + depends on NUMA + help + This option specifies the maximum number of nodes in your SSI system. + MAX_NUMNODES will be 2^(This value). + If in doubt, use the default. + +config HAVE_ARCH_NODEDATA_EXTENSION + def_bool y + depends on NUMA + +config HAVE_MEMORYLESS_NODES + def_bool NUMA + +config ARCH_PROC_KCORE_TEXT + def_bool y + depends on PROC_KCORE + +config IA64_MCA_RECOVERY + bool "MCA recovery from errors other than TLB." + +config IA64_PALINFO + tristate "/proc/pal support" + help + If you say Y here, you are able to get PAL (Processor Abstraction + Layer) information in /proc/pal. This contains useful information + about the processors in your systems, such as cache and TLB sizes + and the PAL firmware version in use. + + To use this option, you have to ensure that the "/proc file system + support" (CONFIG_PROC_FS) is enabled, too. + +config IA64_MC_ERR_INJECT + tristate "MC error injection support" + help + Adds support for MC error injection. If enabled, the kernel + will provide a sysfs interface for user applications to + call MC error injection PAL procedures to inject various errors. + This is a useful tool for MCA testing. + + If you're unsure, do not select this option. + +config IA64_ESI + bool "ESI (Extensible SAL Interface) support" + help + If you say Y here, support is built into the kernel to + make ESI calls. ESI calls are used to support vendor-specific + firmware extensions, such as the ability to inject memory-errors + for test-purposes. If you're unsure, say N. + +config IA64_HP_AML_NFW + bool "Support ACPI AML calls to native firmware" + help + This driver installs a global ACPI Operation Region handler for + region 0xA1. AML methods can use this OpRegion to call arbitrary + native firmware functions. The driver installs the OpRegion + handler if there is an HPQ5001 device or if the user supplies + the "force" module parameter, e.g., with the "aml_nfw.force" + kernel command line option. + +endmenu + +config ARCH_SUPPORTS_KEXEC + def_bool !SMP || HOTPLUG_CPU + +config ARCH_SUPPORTS_CRASH_DUMP + def_bool IA64_MCA_RECOVERY && (!SMP || HOTPLUG_CPU) + +menu "Power management and ACPI options" + +source "kernel/power/Kconfig" + +source "drivers/acpi/Kconfig" + +if PM +menu "CPU Frequency scaling" +source "drivers/cpufreq/Kconfig" +endmenu +endif + +endmenu + +config MSPEC + tristate "Memory special operations driver" + depends on IA64 + select IA64_UNCACHED_ALLOCATOR + help + If you have an ia64 and you want to enable memory special + operations support (formerly known as fetchop), say Y here, + otherwise say N. diff --git a/arch/ia64/Kconfig.debug b/arch/ia64/Kconfig.debug new file mode 100644 index 000000000000..2ce008e2d164 --- /dev/null +++ b/arch/ia64/Kconfig.debug @@ -0,0 +1,55 @@ +# SPDX-License-Identifier: GPL-2.0 + +choice + prompt "Physical memory granularity" + default IA64_GRANULE_64MB + +config IA64_GRANULE_16MB + bool "16MB" + help + IA-64 identity-mapped regions use a large page size called "granules". + + Select "16MB" for a small granule size. + Select "64MB" for a large granule size. This is the current default. + +config IA64_GRANULE_64MB + bool "64MB" + depends on BROKEN + +endchoice + +config IA64_PRINT_HAZARDS + bool "Print possible IA-64 dependency violations to console" + depends on DEBUG_KERNEL + help + Selecting this option prints more information for Illegal Dependency + Faults, that is, for Read-after-Write (RAW), Write-after-Write (WAW), + or Write-after-Read (WAR) violations. This option is ignored if you + are compiling for an Itanium A step processor + (CONFIG_ITANIUM_ASTEP_SPECIFIC). If you're unsure, select Y. + +config DISABLE_VHPT + bool "Disable VHPT" + depends on DEBUG_KERNEL + help + The Virtual Hash Page Table (VHPT) enhances virtual address + translation performance. Normally you want the VHPT active but you + can select this option to disable the VHPT for debugging. If you're + unsure, answer N. + +config IA64_DEBUG_CMPXCHG + bool "Turn on compare-and-exchange bug checking (slow!)" + depends on DEBUG_KERNEL && PRINTK + help + Selecting this option turns on bug checking for the IA-64 + compare-and-exchange instructions. This is slow! Itaniums + from step B3 or later don't have this problem. If you're unsure, + select N. + +config IA64_DEBUG_IRQ + bool "Turn on irq debug checks (slow!)" + depends on DEBUG_KERNEL + help + Selecting this option turns on bug checking for the IA-64 irq_save + and restore instructions. It's useful for tracking down spinlock + problems, but slow! If you're unsure, select N. diff --git a/arch/ia64/Makefile b/arch/ia64/Makefile new file mode 100644 index 000000000000..d553ab7022fe --- /dev/null +++ b/arch/ia64/Makefile @@ -0,0 +1,82 @@ +# +# ia64/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1998-2004 by David Mosberger-Tang +# + +KBUILD_DEFCONFIG := generic_defconfig + +NM := $(CROSS_COMPILE)nm -B + +CHECKFLAGS += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__ + +OBJCOPYFLAGS := --strip-all +LDFLAGS_vmlinux := -static +KBUILD_AFLAGS_KERNEL := -mconstant-gp +EXTRA := + +cflags-y := -pipe $(EXTRA) -ffixed-r13 -mfixed-range=f12-f15,f32-f127 \ + -frename-registers -fno-optimize-sibling-calls +KBUILD_CFLAGS_KERNEL := -mconstant-gp + +GAS_STATUS = $(shell $(srctree)/arch/ia64/scripts/check-gas "$(CC)" "$(OBJDUMP)") +KBUILD_CPPFLAGS += $(shell $(srctree)/arch/ia64/scripts/toolchain-flags "$(CC)" "$(OBJDUMP)" "$(READELF)") + +ifeq ($(GAS_STATUS),buggy) +$(error Sorry, you need a newer version of the assember, one that is built from \ + a source-tree that post-dates 18-Dec-2002. You can find a pre-compiled \ + static binary of such an assembler at: \ + \ + ftp://ftp.hpl.hp.com/pub/linux-ia64/gas-030124.tar.gz) +endif + +quiet_cmd_gzip = GZIP $@ +cmd_gzip = cat $(real-prereqs) | $(KGZIP) -n -f -9 > $@ + +quiet_cmd_objcopy = OBJCOPY $@ +cmd_objcopy = $(OBJCOPY) $(OBJCOPYFLAGS) $(OBJCOPYFLAGS_$(@F)) $< $@ + +KBUILD_CFLAGS += $(cflags-y) + +libs-y += arch/ia64/lib/ + +drivers-y += arch/ia64/pci/ arch/ia64/hp/common/ + +PHONY += compressed check + +all: compressed unwcheck + +compressed: vmlinux.gz + +vmlinuz: vmlinux.gz + +vmlinux.gz: vmlinux.bin FORCE + $(call if_changed,gzip) + +vmlinux.bin: vmlinux FORCE + $(call if_changed,objcopy) + +unwcheck: vmlinux + -$(Q)READELF=$(READELF) $(PYTHON3) $(srctree)/arch/ia64/scripts/unwcheck.py $< + +archheaders: + $(Q)$(MAKE) $(build)=arch/ia64/kernel/syscalls all + +CLEAN_FILES += vmlinux.gz + +install: KBUILD_IMAGE := vmlinux.gz +install: + $(call cmd,install) + +define archhelp + echo '* compressed - Build compressed kernel image' + echo ' install - Install compressed kernel image' + echo '* unwcheck - Check vmlinux for invalid unwind info' +endef diff --git a/arch/ia64/configs/bigsur_defconfig b/arch/ia64/configs/bigsur_defconfig new file mode 100644 index 000000000000..7cb96db9a25d --- /dev/null +++ b/arch/ia64/configs/bigsur_defconfig @@ -0,0 +1,102 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_LOG_BUF_SHIFT=16 +CONFIG_PROFILING=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_SMP=y +CONFIG_NR_CPUS=2 +CONFIG_PREEMPT=y +CONFIG_IA64_PALINFO=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +# CONFIG_IPV6 is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=m +CONFIG_ATA=m +CONFIG_ATA_GENERIC=m +CONFIG_ATA_PIIX=m +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_SPI_ATTRS=m +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_RAID10=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +CONFIG_INPUT_EVDEV=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_I2C=y +CONFIG_I2C_CHARDEV=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_DRM=m +CONFIG_DRM_R128=m +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_CS4281=m +CONFIG_USB_HIDDEV=y +CONFIG_USB=m +CONFIG_USB_MON=m +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_ACM=m +CONFIG_USB_PRINTER=m +CONFIG_USB_STORAGE=m +CONFIG_EXT2_FS=y +CONFIG_EXT3_FS=y +CONFIG_XFS_FS=y +CONFIG_XFS_QUOTA=y +CONFIG_XFS_POSIX_ACL=y +CONFIG_AUTOFS_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_CIFS_XATTR=y +CONFIG_CIFS_POSIX=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_CRYPTO_MD5=y +CONFIG_CRYPTO_DES=y diff --git a/arch/ia64/configs/generic_defconfig b/arch/ia64/configs/generic_defconfig new file mode 100644 index 000000000000..4581240013dd --- /dev/null +++ b/arch/ia64/configs/generic_defconfig @@ -0,0 +1,206 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_CGROUPS=y +CONFIG_CPUSETS=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_MCKINLEY=y +CONFIG_IA64_PAGE_SIZE_64KB=y +CONFIG_IA64_CYCLONE=y +CONFIG_SMP=y +CONFIG_HOTPLUG_CPU=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_IA64_PALINFO=y +CONFIG_KEXEC=y +CONFIG_CRASH_DUMP=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_DOCK=y +CONFIG_ACPI_PROCESSOR=m +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_SYN_COOKIES=y +# CONFIG_IPV6 is not set +CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" +CONFIG_CONNECTOR=y +# CONFIG_PNP_DEBUG_MESSAGES is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_SGI_XP=m +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_CMD64X=y +CONFIG_ATA_PIIX=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_SATA_VITESSE=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=m +CONFIG_FUSION_SAS=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=m +CONFIG_E100=m +CONFIG_E1000=y +CONFIG_IGB=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_GAMEPORT=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_HPET=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_AGP_HP_ZX1=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m +CONFIG_SND_CS4281=m +CONFIG_SND_CS46XX=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_FM801=m +CONFIG_HID_GYRATION=m +CONFIG_HID_PANTHERLORD=m +CONFIG_HID_PETALYNX=m +CONFIG_HID_SAMSUNG=m +CONFIG_HID_SONY=m +CONFIG_HID_SUNPLUS=m +CONFIG_USB=m +CONFIG_USB_MON=m +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_STORAGE=m +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_INTEL_IOMMU=y +CONFIG_MSPEC=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_AUTOFS_FS=m +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_MD5=y +# CONFIG_CRYPTO_ANSI_CPRNG is not set +CONFIG_CRC_T10DIF=y diff --git a/arch/ia64/configs/gensparse_defconfig b/arch/ia64/configs/gensparse_defconfig new file mode 100644 index 000000000000..c9e806616544 --- /dev/null +++ b/arch/ia64/configs/gensparse_defconfig @@ -0,0 +1,184 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_MCKINLEY=y +CONFIG_IA64_CYCLONE=y +CONFIG_SMP=y +CONFIG_NR_CPUS=512 +CONFIG_HOTPLUG_CPU=y +CONFIG_SPARSEMEM_MANUAL=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_IA64_PALINFO=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_SYN_COOKIES=y +# CONFIG_IPV6 is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_CMD64X=y +CONFIG_ATA_PIIX=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_DM_MULTIPATH=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=m +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=m +CONFIG_E100=m +CONFIG_E1000=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_GAMEPORT=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_HPET=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_AGP_HP_ZX1=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_SOUND=m +CONFIG_SND=m +CONFIG_SND_SEQUENCER=m +CONFIG_SND_SEQ_DUMMY=m +CONFIG_SND_MIXER_OSS=m +CONFIG_SND_PCM_OSS=m +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_VERBOSE_PRINTK=y +CONFIG_SND_DUMMY=m +CONFIG_SND_VIRMIDI=m +CONFIG_SND_MTPAV=m +CONFIG_SND_SERIAL_U16550=m +CONFIG_SND_MPU401=m +CONFIG_SND_CS4281=m +CONFIG_SND_CS46XX=m +CONFIG_SND_EMU10K1=m +CONFIG_SND_FM801=m +CONFIG_USB=m +CONFIG_USB_MON=m +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=m +CONFIG_USB_STORAGE=m +CONFIG_INFINIBAND=m +CONFIG_INFINIBAND_MTHCA=m +CONFIG_INFINIBAND_IPOIB=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_AUTOFS_FS=y +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_CRYPTO_MD5=y diff --git a/arch/ia64/configs/tiger_defconfig b/arch/ia64/configs/tiger_defconfig new file mode 100644 index 000000000000..d7d8fb5c7b71 --- /dev/null +++ b/arch/ia64/configs/tiger_defconfig @@ -0,0 +1,169 @@ +CONFIG_SYSVIPC=y +CONFIG_POSIX_MQUEUE=y +CONFIG_IKCONFIG=y +CONFIG_IKCONFIG_PROC=y +CONFIG_LOG_BUF_SHIFT=20 +CONFIG_BLK_DEV_INITRD=y +CONFIG_KALLSYMS_ALL=y +CONFIG_MODULES=y +CONFIG_MODULE_UNLOAD=y +CONFIG_MODVERSIONS=y +CONFIG_MODULE_SRCVERSION_ALL=y +# CONFIG_BLK_DEV_BSG is not set +CONFIG_PARTITION_ADVANCED=y +CONFIG_SGI_PARTITION=y +CONFIG_MCKINLEY=y +CONFIG_IA64_PAGE_SIZE_64KB=y +CONFIG_IA64_CYCLONE=y +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_HOTPLUG_CPU=y +CONFIG_PERMIT_BSP_REMOVE=y +CONFIG_FORCE_CPEI_RETARGET=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_IA64_PALINFO=y +CONFIG_KEXEC=y +CONFIG_BINFMT_MISC=m +CONFIG_ACPI_BUTTON=m +CONFIG_ACPI_FAN=m +CONFIG_ACPI_PROCESSOR=m +CONFIG_HOTPLUG_PCI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +CONFIG_SYN_COOKIES=y +# CONFIG_IPV6 is not set +CONFIG_BLK_DEV_LOOP=m +CONFIG_BLK_DEV_NBD=m +CONFIG_BLK_DEV_RAM=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_CMD64X=y +CONFIG_ATA_PIIX=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=m +CONFIG_BLK_DEV_SR=m +CONFIG_CHR_DEV_SG=m +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_MD=y +CONFIG_BLK_DEV_MD=m +CONFIG_MD_LINEAR=m +CONFIG_MD_RAID0=m +CONFIG_MD_RAID1=m +CONFIG_MD_MULTIPATH=m +CONFIG_BLK_DEV_DM=m +CONFIG_DM_CRYPT=m +CONFIG_DM_SNAPSHOT=m +CONFIG_DM_MIRROR=m +CONFIG_DM_ZERO=m +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=y +CONFIG_FUSION_CTL=y +CONFIG_NETDEVICES=y +CONFIG_DUMMY=m +CONFIG_NETCONSOLE=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=m +CONFIG_E100=m +CONFIG_E1000=y +# CONFIG_SERIO_SERPORT is not set +CONFIG_GAMEPORT=m +CONFIG_SERIAL_NONSTANDARD=y +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=6 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_HPET=y +CONFIG_AGP=m +CONFIG_AGP_I460=m +CONFIG_DRM=m +CONFIG_DRM_TDFX=m +CONFIG_DRM_R128=m +CONFIG_DRM_RADEON=m +CONFIG_DRM_MGA=m +CONFIG_DRM_SIS=m +CONFIG_USB=y +CONFIG_USB_EHCI_HCD=m +CONFIG_USB_OHCI_HCD=m +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_STORAGE=m +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT2_FS_POSIX_ACL=y +CONFIG_EXT2_FS_SECURITY=y +CONFIG_EXT3_FS=y +CONFIG_EXT3_FS_POSIX_ACL=y +CONFIG_EXT3_FS_SECURITY=y +CONFIG_REISERFS_FS=y +CONFIG_REISERFS_FS_XATTR=y +CONFIG_REISERFS_FS_POSIX_ACL=y +CONFIG_REISERFS_FS_SECURITY=y +CONFIG_XFS_FS=y +CONFIG_AUTOFS_FS=y +CONFIG_ISO9660_FS=m +CONFIG_JOLIET=y +CONFIG_UDF_FS=m +CONFIG_VFAT_FS=y +CONFIG_NTFS_FS=m +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=m +CONFIG_NFS_V4=m +CONFIG_NFSD=m +CONFIG_NFSD_V4=y +CONFIG_CIFS=m +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=m +CONFIG_NLS_CODEPAGE_775=m +CONFIG_NLS_CODEPAGE_850=m +CONFIG_NLS_CODEPAGE_852=m +CONFIG_NLS_CODEPAGE_855=m +CONFIG_NLS_CODEPAGE_857=m +CONFIG_NLS_CODEPAGE_860=m +CONFIG_NLS_CODEPAGE_861=m +CONFIG_NLS_CODEPAGE_862=m +CONFIG_NLS_CODEPAGE_863=m +CONFIG_NLS_CODEPAGE_864=m +CONFIG_NLS_CODEPAGE_865=m +CONFIG_NLS_CODEPAGE_866=m +CONFIG_NLS_CODEPAGE_869=m +CONFIG_NLS_CODEPAGE_936=m +CONFIG_NLS_CODEPAGE_950=m +CONFIG_NLS_CODEPAGE_932=m +CONFIG_NLS_CODEPAGE_949=m +CONFIG_NLS_CODEPAGE_874=m +CONFIG_NLS_ISO8859_8=m +CONFIG_NLS_CODEPAGE_1250=m +CONFIG_NLS_CODEPAGE_1251=m +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=m +CONFIG_NLS_ISO8859_3=m +CONFIG_NLS_ISO8859_4=m +CONFIG_NLS_ISO8859_5=m +CONFIG_NLS_ISO8859_6=m +CONFIG_NLS_ISO8859_7=m +CONFIG_NLS_ISO8859_9=m +CONFIG_NLS_ISO8859_13=m +CONFIG_NLS_ISO8859_14=m +CONFIG_NLS_ISO8859_15=m +CONFIG_NLS_KOI8_R=m +CONFIG_NLS_KOI8_U=m +CONFIG_NLS_UTF8=m +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_IA64_GRANULE_16MB=y +CONFIG_CRYPTO_PCBC=m +CONFIG_CRYPTO_MD5=y diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig new file mode 100644 index 000000000000..ed104550d0d5 --- /dev/null +++ b/arch/ia64/configs/zx1_defconfig @@ -0,0 +1,148 @@ +CONFIG_SYSVIPC=y +CONFIG_BSD_PROCESS_ACCT=y +CONFIG_BLK_DEV_INITRD=y +CONFIG_KPROBES=y +CONFIG_MODULES=y +CONFIG_PARTITION_ADVANCED=y +CONFIG_MCKINLEY=y +CONFIG_SMP=y +CONFIG_NR_CPUS=16 +CONFIG_HOTPLUG_CPU=y +CONFIG_FLATMEM_MANUAL=y +CONFIG_IA64_MCA_RECOVERY=y +CONFIG_IA64_PALINFO=y +CONFIG_CRASH_DUMP=y +CONFIG_BINFMT_MISC=y +CONFIG_HOTPLUG_PCI=y +CONFIG_HOTPLUG_PCI_ACPI=y +CONFIG_NET=y +CONFIG_PACKET=y +CONFIG_UNIX=y +CONFIG_INET=y +CONFIG_IP_MULTICAST=y +# CONFIG_IPV6 is not set +CONFIG_NETFILTER=y +CONFIG_BLK_DEV_LOOP=y +CONFIG_BLK_DEV_RAM=y +CONFIG_ATA=y +CONFIG_ATA_GENERIC=y +CONFIG_PATA_CMD64X=y +CONFIG_SCSI=y +CONFIG_BLK_DEV_SD=y +CONFIG_CHR_DEV_ST=y +CONFIG_BLK_DEV_SR=y +CONFIG_CHR_DEV_SG=y +CONFIG_SCSI_CONSTANTS=y +CONFIG_SCSI_LOGGING=y +CONFIG_SCSI_FC_ATTRS=y +CONFIG_SCSI_SYM53C8XX_2=y +CONFIG_SCSI_QLOGIC_1280=y +CONFIG_FUSION=y +CONFIG_FUSION_SPI=y +CONFIG_FUSION_FC=y +CONFIG_FUSION_CTL=m +CONFIG_NETDEVICES=y +CONFIG_DUMMY=y +CONFIG_TIGON3=y +CONFIG_NET_TULIP=y +CONFIG_TULIP=y +CONFIG_TULIP_MWI=y +CONFIG_TULIP_MMIO=y +CONFIG_TULIP_NAPI=y +CONFIG_TULIP_NAPI_HW_MITIGATION=y +CONFIG_E100=y +CONFIG_E1000=y +CONFIG_INPUT_JOYDEV=y +CONFIG_INPUT_EVDEV=y +# CONFIG_INPUT_KEYBOARD is not set +# CONFIG_INPUT_MOUSE is not set +# CONFIG_SERIO_I8042 is not set +# CONFIG_SERIO_SERPORT is not set +CONFIG_SERIAL_8250=y +CONFIG_SERIAL_8250_CONSOLE=y +CONFIG_SERIAL_8250_NR_UARTS=8 +CONFIG_SERIAL_8250_EXTENDED=y +CONFIG_SERIAL_8250_SHARE_IRQ=y +# CONFIG_HW_RANDOM is not set +CONFIG_RTC_CLASS=y +CONFIG_RTC_DRV_EFI=y +CONFIG_I2C_CHARDEV=y +CONFIG_AGP=y +CONFIG_AGP_HP_ZX1=y +CONFIG_DRM=y +CONFIG_DRM_RADEON=y +CONFIG_FB_RADEON=y +CONFIG_FB_RADEON_DEBUG=y +CONFIG_LOGO=y +# CONFIG_LOGO_LINUX_MONO is not set +# CONFIG_LOGO_LINUX_VGA16 is not set +CONFIG_SOUND=y +CONFIG_SND=y +CONFIG_SND_SEQUENCER=y +CONFIG_SND_MIXER_OSS=y +CONFIG_SND_PCM_OSS=y +CONFIG_SND_SEQUENCER_OSS=y +CONFIG_SND_FM801=y +CONFIG_USB_HIDDEV=y +CONFIG_USB=y +CONFIG_USB_MON=y +CONFIG_USB_EHCI_HCD=y +CONFIG_USB_OHCI_HCD=y +CONFIG_USB_UHCI_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_EXT2_FS=y +CONFIG_EXT2_FS_XATTR=y +CONFIG_EXT3_FS=y +CONFIG_ISO9660_FS=y +CONFIG_JOLIET=y +CONFIG_UDF_FS=y +CONFIG_MSDOS_FS=y +CONFIG_VFAT_FS=y +CONFIG_PROC_KCORE=y +CONFIG_TMPFS=y +CONFIG_HUGETLBFS=y +CONFIG_NFS_FS=y +CONFIG_NFS_V4=y +CONFIG_NFSD=y +CONFIG_NLS_CODEPAGE_437=y +CONFIG_NLS_CODEPAGE_737=y +CONFIG_NLS_CODEPAGE_775=y +CONFIG_NLS_CODEPAGE_850=y +CONFIG_NLS_CODEPAGE_852=y +CONFIG_NLS_CODEPAGE_855=y +CONFIG_NLS_CODEPAGE_857=y +CONFIG_NLS_CODEPAGE_860=y +CONFIG_NLS_CODEPAGE_861=y +CONFIG_NLS_CODEPAGE_862=y +CONFIG_NLS_CODEPAGE_863=y +CONFIG_NLS_CODEPAGE_864=y +CONFIG_NLS_CODEPAGE_865=y +CONFIG_NLS_CODEPAGE_866=y +CONFIG_NLS_CODEPAGE_869=y +CONFIG_NLS_CODEPAGE_936=y +CONFIG_NLS_CODEPAGE_950=y +CONFIG_NLS_CODEPAGE_932=y +CONFIG_NLS_CODEPAGE_949=y +CONFIG_NLS_CODEPAGE_874=y +CONFIG_NLS_ISO8859_8=y +CONFIG_NLS_CODEPAGE_1251=y +CONFIG_NLS_ISO8859_1=y +CONFIG_NLS_ISO8859_2=y +CONFIG_NLS_ISO8859_3=y +CONFIG_NLS_ISO8859_4=y +CONFIG_NLS_ISO8859_5=y +CONFIG_NLS_ISO8859_6=y +CONFIG_NLS_ISO8859_7=y +CONFIG_NLS_ISO8859_9=y +CONFIG_NLS_ISO8859_13=y +CONFIG_NLS_ISO8859_14=y +CONFIG_NLS_ISO8859_15=y +CONFIG_NLS_KOI8_R=y +CONFIG_NLS_KOI8_U=y +CONFIG_NLS_UTF8=y +CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_KERNEL=y +CONFIG_DEBUG_MUTEXES=y +CONFIG_IA64_PRINT_HAZARDS=y +CONFIG_CRYPTO_ECB=m +CONFIG_CRYPTO_PCBC=m diff --git a/arch/ia64/hp/common/Makefile b/arch/ia64/hp/common/Makefile new file mode 100644 index 000000000000..11a56ed38229 --- /dev/null +++ b/arch/ia64/hp/common/Makefile @@ -0,0 +1,10 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# ia64/platform/hp/common/Makefile +# +# Copyright (C) 2002 Hewlett Packard +# Copyright (C) Alex Williamson (alex_williamson@hp.com) +# + +obj-$(CONFIG_IA64_HP_SBA_IOMMU) += sba_iommu.o +obj-$(CONFIG_IA64_HP_AML_NFW) += aml_nfw.o diff --git a/arch/ia64/hp/common/aml_nfw.c b/arch/ia64/hp/common/aml_nfw.c new file mode 100644 index 000000000000..901df49461a0 --- /dev/null +++ b/arch/ia64/hp/common/aml_nfw.c @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * OpRegion handler to allow AML to call native firmware + * + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + * + * This driver implements HP Open Source Review Board proposal 1842, + * which was approved on 9/20/2006. + * + * For technical documentation, see the HP SPPA Firmware EAS, Appendix F. + * + * ACPI does not define a mechanism for AML methods to call native firmware + * interfaces such as PAL or SAL. This OpRegion handler adds such a mechanism. + * After the handler is installed, an AML method can call native firmware by + * storing the arguments and firmware entry point to specific offsets in the + * OpRegion. When AML reads the "return value" offset from the OpRegion, this + * handler loads up the arguments, makes the firmware call, and returns the + * result. + */ + +#include +#include +#include + +MODULE_AUTHOR("Bjorn Helgaas "); +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("ACPI opregion handler for native firmware calls"); + +static bool force_register; +module_param_named(force, force_register, bool, 0); +MODULE_PARM_DESC(force, "Install opregion handler even without HPQ5001 device"); + +#define AML_NFW_SPACE 0xA1 + +struct ia64_pdesc { + void *ip; + void *gp; +}; + +/* + * N.B. The layout of this structure is defined in the HP SPPA FW EAS, and + * the member offsets are embedded in AML methods. + */ +struct ia64_nfw_context { + u64 arg[8]; + struct ia64_sal_retval ret; + u64 ip; + u64 gp; + u64 pad[2]; +}; + +static void *virt_map(u64 address) +{ + if (address & (1UL << 63)) + return (void *) (__IA64_UNCACHED_OFFSET | address); + + return __va(address); +} + +static void aml_nfw_execute(struct ia64_nfw_context *c) +{ + struct ia64_pdesc virt_entry; + ia64_sal_handler entry; + + virt_entry.ip = virt_map(c->ip); + virt_entry.gp = virt_map(c->gp); + + entry = (ia64_sal_handler) &virt_entry; + + IA64_FW_CALL(entry, c->ret, + c->arg[0], c->arg[1], c->arg[2], c->arg[3], + c->arg[4], c->arg[5], c->arg[6], c->arg[7]); +} + +static void aml_nfw_read_arg(u8 *offset, u32 bit_width, u64 *value) +{ + switch (bit_width) { + case 8: + *value = *(u8 *)offset; + break; + case 16: + *value = *(u16 *)offset; + break; + case 32: + *value = *(u32 *)offset; + break; + case 64: + *value = *(u64 *)offset; + break; + } +} + +static void aml_nfw_write_arg(u8 *offset, u32 bit_width, u64 *value) +{ + switch (bit_width) { + case 8: + *(u8 *) offset = *value; + break; + case 16: + *(u16 *) offset = *value; + break; + case 32: + *(u32 *) offset = *value; + break; + case 64: + *(u64 *) offset = *value; + break; + } +} + +static acpi_status aml_nfw_handler(u32 function, acpi_physical_address address, + u32 bit_width, u64 *value, void *handler_context, + void *region_context) +{ + struct ia64_nfw_context *context = handler_context; + u8 *offset = (u8 *) context + address; + + if (bit_width != 8 && bit_width != 16 && + bit_width != 32 && bit_width != 64) + return AE_BAD_PARAMETER; + + if (address + (bit_width >> 3) > sizeof(struct ia64_nfw_context)) + return AE_BAD_PARAMETER; + + switch (function) { + case ACPI_READ: + if (address == offsetof(struct ia64_nfw_context, ret)) + aml_nfw_execute(context); + aml_nfw_read_arg(offset, bit_width, value); + break; + case ACPI_WRITE: + aml_nfw_write_arg(offset, bit_width, value); + break; + } + + return AE_OK; +} + +static struct ia64_nfw_context global_context; +static int global_handler_registered; + +static int aml_nfw_add_global_handler(void) +{ + acpi_status status; + + if (global_handler_registered) + return 0; + + status = acpi_install_address_space_handler(ACPI_ROOT_OBJECT, + AML_NFW_SPACE, aml_nfw_handler, NULL, &global_context); + if (ACPI_FAILURE(status)) + return -ENODEV; + + global_handler_registered = 1; + printk(KERN_INFO "Global 0x%02X opregion handler registered\n", + AML_NFW_SPACE); + return 0; +} + +static int aml_nfw_remove_global_handler(void) +{ + acpi_status status; + + if (!global_handler_registered) + return 0; + + status = acpi_remove_address_space_handler(ACPI_ROOT_OBJECT, + AML_NFW_SPACE, aml_nfw_handler); + if (ACPI_FAILURE(status)) + return -ENODEV; + + global_handler_registered = 0; + printk(KERN_INFO "Global 0x%02X opregion handler removed\n", + AML_NFW_SPACE); + return 0; +} + +static int aml_nfw_add(struct acpi_device *device) +{ + /* + * We would normally allocate a new context structure and install + * the address space handler for the specific device we found. + * But the HP-UX implementation shares a single global context + * and always puts the handler at the root, so we'll do the same. + */ + return aml_nfw_add_global_handler(); +} + +static void aml_nfw_remove(struct acpi_device *device) +{ + aml_nfw_remove_global_handler(); +} + +static const struct acpi_device_id aml_nfw_ids[] = { + {"HPQ5001", 0}, + {"", 0} +}; + +static struct acpi_driver acpi_aml_nfw_driver = { + .name = "native firmware", + .ids = aml_nfw_ids, + .ops = { + .add = aml_nfw_add, + .remove = aml_nfw_remove, + }, +}; + +static int __init aml_nfw_init(void) +{ + int result; + + if (force_register) + aml_nfw_add_global_handler(); + + result = acpi_bus_register_driver(&acpi_aml_nfw_driver); + if (result < 0) { + aml_nfw_remove_global_handler(); + return result; + } + + return 0; +} + +static void __exit aml_nfw_exit(void) +{ + acpi_bus_unregister_driver(&acpi_aml_nfw_driver); + aml_nfw_remove_global_handler(); +} + +module_init(aml_nfw_init); +module_exit(aml_nfw_exit); diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c new file mode 100644 index 000000000000..c4d477e8bcd4 --- /dev/null +++ b/arch/ia64/hp/common/sba_iommu.c @@ -0,0 +1,2155 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* +** IA64 System Bus Adapter (SBA) I/O MMU manager +** +** (c) Copyright 2002-2005 Alex Williamson +** (c) Copyright 2002-2003 Grant Grundler +** (c) Copyright 2002-2005 Hewlett-Packard Company +** +** Portions (c) 2000 Grant Grundler (from parisc I/O MMU code) +** Portions (c) 1999 Dave S. Miller (from sparc64 I/O MMU code) +** +** +** +** This module initializes the IOC (I/O Controller) found on HP +** McKinley machines and their successors. +** +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include /* hweight64() */ +#include +#include +#include +#include +#include + +#include /* ia64_get_itc() */ +#include +#include /* PAGE_OFFSET */ +#include + +#include + +#define PFX "IOC: " + +/* +** Enabling timing search of the pdir resource map. Output in /proc. +** Disabled by default to optimize performance. +*/ +#undef PDIR_SEARCH_TIMING + +/* +** This option allows cards capable of 64bit DMA to bypass the IOMMU. If +** not defined, all DMA will be 32bit and go through the TLB. +** There's potentially a conflict in the bio merge code with us +** advertising an iommu, but then bypassing it. Since I/O MMU bypassing +** appears to give more performance than bio-level virtual merging, we'll +** do the former for now. NOTE: BYPASS_SG also needs to be undef'd to +** completely restrict DMA to the IOMMU. +*/ +#define ALLOW_IOV_BYPASS + +/* +** This option specifically allows/disallows bypassing scatterlists with +** multiple entries. Coalescing these entries can allow better DMA streaming +** and in some cases shows better performance than entirely bypassing the +** IOMMU. Performance increase on the order of 1-2% sequential output/input +** using bonnie++ on a RAID0 MD device (sym2 & mpt). +*/ +#undef ALLOW_IOV_BYPASS_SG + +/* +** If a device prefetches beyond the end of a valid pdir entry, it will cause +** a hard failure, ie. MCA. Version 3.0 and later of the zx1 LBA should +** disconnect on 4k boundaries and prevent such issues. If the device is +** particularly aggressive, this option will keep the entire pdir valid such +** that prefetching will hit a valid address. This could severely impact +** error containment, and is therefore off by default. The page that is +** used for spill-over is poisoned, so that should help debugging somewhat. +*/ +#undef FULL_VALID_PDIR + +#define ENABLE_MARK_CLEAN + +/* +** The number of debug flags is a clue - this code is fragile. NOTE: since +** tightening the use of res_lock the resource bitmap and actual pdir are no +** longer guaranteed to stay in sync. The sanity checking code isn't going to +** like that. +*/ +#undef DEBUG_SBA_INIT +#undef DEBUG_SBA_RUN +#undef DEBUG_SBA_RUN_SG +#undef DEBUG_SBA_RESOURCE +#undef ASSERT_PDIR_SANITY +#undef DEBUG_LARGE_SG_ENTRIES +#undef DEBUG_BYPASS + +#if defined(FULL_VALID_PDIR) && defined(ASSERT_PDIR_SANITY) +#error FULL_VALID_PDIR and ASSERT_PDIR_SANITY are mutually exclusive +#endif + +#define SBA_INLINE __inline__ +/* #define SBA_INLINE */ + +#ifdef DEBUG_SBA_INIT +#define DBG_INIT(x...) printk(x) +#else +#define DBG_INIT(x...) +#endif + +#ifdef DEBUG_SBA_RUN +#define DBG_RUN(x...) printk(x) +#else +#define DBG_RUN(x...) +#endif + +#ifdef DEBUG_SBA_RUN_SG +#define DBG_RUN_SG(x...) printk(x) +#else +#define DBG_RUN_SG(x...) +#endif + + +#ifdef DEBUG_SBA_RESOURCE +#define DBG_RES(x...) printk(x) +#else +#define DBG_RES(x...) +#endif + +#ifdef DEBUG_BYPASS +#define DBG_BYPASS(x...) printk(x) +#else +#define DBG_BYPASS(x...) +#endif + +#ifdef ASSERT_PDIR_SANITY +#define ASSERT(expr) \ + if(!(expr)) { \ + printk( "\n" __FILE__ ":%d: Assertion " #expr " failed!\n",__LINE__); \ + panic(#expr); \ + } +#else +#define ASSERT(expr) +#endif + +/* +** The number of pdir entries to "free" before issuing +** a read to PCOM register to flush out PCOM writes. +** Interacts with allocation granularity (ie 4 or 8 entries +** allocated and free'd/purged at a time might make this +** less interesting). +*/ +#define DELAYED_RESOURCE_CNT 64 + +#define PCI_DEVICE_ID_HP_SX2000_IOC 0x12ec + +#define ZX1_IOC_ID ((PCI_DEVICE_ID_HP_ZX1_IOC << 16) | PCI_VENDOR_ID_HP) +#define ZX2_IOC_ID ((PCI_DEVICE_ID_HP_ZX2_IOC << 16) | PCI_VENDOR_ID_HP) +#define REO_IOC_ID ((PCI_DEVICE_ID_HP_REO_IOC << 16) | PCI_VENDOR_ID_HP) +#define SX1000_IOC_ID ((PCI_DEVICE_ID_HP_SX1000_IOC << 16) | PCI_VENDOR_ID_HP) +#define SX2000_IOC_ID ((PCI_DEVICE_ID_HP_SX2000_IOC << 16) | PCI_VENDOR_ID_HP) + +#define ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ + +#define IOC_FUNC_ID 0x000 +#define IOC_FCLASS 0x008 /* function class, bist, header, rev... */ +#define IOC_IBASE 0x300 /* IO TLB */ +#define IOC_IMASK 0x308 +#define IOC_PCOM 0x310 +#define IOC_TCNFG 0x318 +#define IOC_PDIR_BASE 0x320 + +#define IOC_ROPE0_CFG 0x500 +#define IOC_ROPE_AO 0x10 /* Allow "Relaxed Ordering" */ + + +/* AGP GART driver looks for this */ +#define ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL + +/* +** The zx1 IOC supports 4/8/16/64KB page sizes (see TCNFG register) +** +** Some IOCs (sx1000) can run at the above pages sizes, but are +** really only supported using the IOC at a 4k page size. +** +** iovp_size could only be greater than PAGE_SIZE if we are +** confident the drivers really only touch the next physical +** page iff that driver instance owns it. +*/ +static unsigned long iovp_size; +static unsigned long iovp_shift; +static unsigned long iovp_mask; + +struct ioc { + void __iomem *ioc_hpa; /* I/O MMU base address */ + char *res_map; /* resource map, bit == pdir entry */ + u64 *pdir_base; /* physical base address */ + unsigned long ibase; /* pdir IOV Space base */ + unsigned long imask; /* pdir IOV Space mask */ + + unsigned long *res_hint; /* next avail IOVP - circular search */ + unsigned long dma_mask; + spinlock_t res_lock; /* protects the resource bitmap, but must be held when */ + /* clearing pdir to prevent races with allocations. */ + unsigned int res_bitshift; /* from the RIGHT! */ + unsigned int res_size; /* size of resource map in bytes */ +#ifdef CONFIG_NUMA + unsigned int node; /* node where this IOC lives */ +#endif +#if DELAYED_RESOURCE_CNT > 0 + spinlock_t saved_lock; /* may want to try to get this on a separate cacheline */ + /* than res_lock for bigger systems. */ + int saved_cnt; + struct sba_dma_pair { + dma_addr_t iova; + size_t size; + } saved[DELAYED_RESOURCE_CNT]; +#endif + +#ifdef PDIR_SEARCH_TIMING +#define SBA_SEARCH_SAMPLE 0x100 + unsigned long avg_search[SBA_SEARCH_SAMPLE]; + unsigned long avg_idx; /* current index into avg_search */ +#endif + + /* Stuff we don't need in performance path */ + struct ioc *next; /* list of IOC's in system */ + acpi_handle handle; /* for multiple IOC's */ + const char *name; + unsigned int func_id; + unsigned int rev; /* HW revision of chip */ + u32 iov_size; + unsigned int pdir_size; /* in bytes, determined by IOV Space size */ + struct pci_dev *sac_only_dev; +}; + +static struct ioc *ioc_list, *ioc_found; +static int reserve_sba_gart = 1; + +static SBA_INLINE void sba_mark_invalid(struct ioc *, dma_addr_t, size_t); +static SBA_INLINE void sba_free_range(struct ioc *, dma_addr_t, size_t); + +#define sba_sg_address(sg) sg_virt((sg)) + +#ifdef FULL_VALID_PDIR +static u64 prefetch_spill_page; +#endif + +#define GET_IOC(dev) ((dev_is_pci(dev)) \ + ? ((struct ioc *) PCI_CONTROLLER(to_pci_dev(dev))->iommu) : NULL) + +/* +** DMA_CHUNK_SIZE is used by the SCSI mid-layer to break up +** (or rather not merge) DMAs into manageable chunks. +** On parisc, this is more of the software/tuning constraint +** rather than the HW. I/O MMU allocation algorithms can be +** faster with smaller sizes (to some degree). +*/ +#define DMA_CHUNK_SIZE (BITS_PER_LONG*iovp_size) + +#define ROUNDUP(x,y) ((x + ((y)-1)) & ~((y)-1)) + +/************************************ +** SBA register read and write support +** +** BE WARNED: register writes are posted. +** (ie follow writes which must reach HW with a read) +** +*/ +#define READ_REG(addr) __raw_readq(addr) +#define WRITE_REG(val, addr) __raw_writeq(val, addr) + +#ifdef DEBUG_SBA_INIT + +/** + * sba_dump_tlb - debugging only - print IOMMU operating parameters + * @hpa: base address of the IOMMU + * + * Print the size/location of the IO MMU PDIR. + */ +static void +sba_dump_tlb(char *hpa) +{ + DBG_INIT("IO TLB at 0x%p\n", (void *)hpa); + DBG_INIT("IOC_IBASE : %016lx\n", READ_REG(hpa+IOC_IBASE)); + DBG_INIT("IOC_IMASK : %016lx\n", READ_REG(hpa+IOC_IMASK)); + DBG_INIT("IOC_TCNFG : %016lx\n", READ_REG(hpa+IOC_TCNFG)); + DBG_INIT("IOC_PDIR_BASE: %016lx\n", READ_REG(hpa+IOC_PDIR_BASE)); + DBG_INIT("\n"); +} +#endif + + +#ifdef ASSERT_PDIR_SANITY + +/** + * sba_dump_pdir_entry - debugging only - print one IOMMU PDIR entry + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * @pide: pdir index. + * + * Print one entry of the IO MMU PDIR in human readable form. + */ +static void +sba_dump_pdir_entry(struct ioc *ioc, char *msg, uint pide) +{ + /* start printing from lowest pde in rval */ + u64 *ptr = &ioc->pdir_base[pide & ~(BITS_PER_LONG - 1)]; + unsigned long *rptr = (unsigned long *) &ioc->res_map[(pide >>3) & -sizeof(unsigned long)]; + uint rcnt; + + printk(KERN_DEBUG "SBA: %s rp %p bit %d rval 0x%lx\n", + msg, rptr, pide & (BITS_PER_LONG - 1), *rptr); + + rcnt = 0; + while (rcnt < BITS_PER_LONG) { + printk(KERN_DEBUG "%s %2d %p %016Lx\n", + (rcnt == (pide & (BITS_PER_LONG - 1))) + ? " -->" : " ", + rcnt, ptr, (unsigned long long) *ptr ); + rcnt++; + ptr++; + } + printk(KERN_DEBUG "%s", msg); +} + + +/** + * sba_check_pdir - debugging only - consistency checker + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @msg: text to print ont the output line. + * + * Verify the resource map and pdir state is consistent + */ +static int +sba_check_pdir(struct ioc *ioc, char *msg) +{ + u64 *rptr_end = (u64 *) &(ioc->res_map[ioc->res_size]); + u64 *rptr = (u64 *) ioc->res_map; /* resource map ptr */ + u64 *pptr = ioc->pdir_base; /* pdir ptr */ + uint pide = 0; + + while (rptr < rptr_end) { + u64 rval; + int rcnt; /* number of bits we might check */ + + rval = *rptr; + rcnt = 64; + + while (rcnt) { + /* Get last byte and highest bit from that */ + u32 pde = ((u32)((*pptr >> (63)) & 0x1)); + if ((rval & 0x1) ^ pde) + { + /* + ** BUMMER! -- res_map != pdir -- + ** Dump rval and matching pdir entries + */ + sba_dump_pdir_entry(ioc, msg, pide); + return(1); + } + rcnt--; + rval >>= 1; /* try the next bit */ + pptr++; + pide++; + } + rptr++; /* look at next word of res_map */ + } + /* It'd be nice if we always got here :^) */ + return 0; +} + + +/** + * sba_dump_sg - debugging only - print Scatter-Gather list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: head of the SG list + * @nents: number of entries in SG list + * + * print the SG list so we can verify it's correct by hand. + */ +static void +sba_dump_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + while (nents-- > 0) { + printk(KERN_DEBUG " %d : DMA %08lx/%05x CPU %p\n", nents, + startsg->dma_address, startsg->dma_length, + sba_sg_address(startsg)); + startsg = sg_next(startsg); + } +} + +static void +sba_check_sg( struct ioc *ioc, struct scatterlist *startsg, int nents) +{ + struct scatterlist *the_sg = startsg; + int the_nents = nents; + + while (the_nents-- > 0) { + if (sba_sg_address(the_sg) == 0x0UL) + sba_dump_sg(NULL, startsg, nents); + the_sg = sg_next(the_sg); + } +} + +#endif /* ASSERT_PDIR_SANITY */ + + + + +/************************************************************** +* +* I/O Pdir Resource Management +* +* Bits set in the resource map are in use. +* Each bit can represent a number of pages. +* LSbs represent lower addresses (IOVA's). +* +***************************************************************/ +#define PAGES_PER_RANGE 1 /* could increase this to 4 or 8 if needed */ + +/* Convert from IOVP to IOVA and vice versa. */ +#define SBA_IOVA(ioc,iovp,offset) ((ioc->ibase) | (iovp) | (offset)) +#define SBA_IOVP(ioc,iova) ((iova) & ~(ioc->ibase)) + +#define PDIR_ENTRY_SIZE sizeof(u64) + +#define PDIR_INDEX(iovp) ((iovp)>>iovp_shift) + +#define RESMAP_MASK(n) ~(~0UL << (n)) +#define RESMAP_IDX_MASK (sizeof(unsigned long) - 1) + + +/** + * For most cases the normal get_order is sufficient, however it limits us + * to PAGE_SIZE being the minimum mapping alignment and TC flush granularity. + * It only incurs about 1 clock cycle to use this one with the static variable + * and makes the code more intuitive. + */ +static SBA_INLINE int +get_iovp_order (unsigned long size) +{ + long double d = size - 1; + long order; + + order = ia64_getf_exp(d); + order = order - iovp_shift - 0xffff + 1; + if (order < 0) + order = 0; + return order; +} + +static unsigned long ptr_to_pide(struct ioc *ioc, unsigned long *res_ptr, + unsigned int bitshiftcnt) +{ + return (((unsigned long)res_ptr - (unsigned long)ioc->res_map) << 3) + + bitshiftcnt; +} + +/** + * sba_search_bitmap - find free space in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @bits_wanted: number of entries we need. + * @use_hint: use res_hint to indicate where to start looking + * + * Find consecutive free bits in resource bitmap. + * Each bit represents one entry in the IO Pdir. + * Cool perf optimization: search for log2(size) bits at a time. + */ +static SBA_INLINE unsigned long +sba_search_bitmap(struct ioc *ioc, struct device *dev, + unsigned long bits_wanted, int use_hint) +{ + unsigned long *res_ptr; + unsigned long *res_end = (unsigned long *) &(ioc->res_map[ioc->res_size]); + unsigned long flags, pide = ~0UL, tpide; + unsigned long boundary_size; + unsigned long shift; + int ret; + + ASSERT(((unsigned long) ioc->res_hint & (sizeof(unsigned long) - 1UL)) == 0); + ASSERT(res_ptr < res_end); + + boundary_size = dma_get_seg_boundary_nr_pages(dev, iovp_shift); + + BUG_ON(ioc->ibase & ~iovp_mask); + shift = ioc->ibase >> iovp_shift; + + spin_lock_irqsave(&ioc->res_lock, flags); + + /* Allow caller to force a search through the entire resource space */ + if (likely(use_hint)) { + res_ptr = ioc->res_hint; + } else { + res_ptr = (ulong *)ioc->res_map; + ioc->res_bitshift = 0; + } + + /* + * N.B. REO/Grande defect AR2305 can cause TLB fetch timeouts + * if a TLB entry is purged while in use. sba_mark_invalid() + * purges IOTLB entries in power-of-two sizes, so we also + * allocate IOVA space in power-of-two sizes. + */ + bits_wanted = 1UL << get_iovp_order(bits_wanted << iovp_shift); + + if (likely(bits_wanted == 1)) { + unsigned int bitshiftcnt; + for(; res_ptr < res_end ; res_ptr++) { + if (likely(*res_ptr != ~0UL)) { + bitshiftcnt = ffz(*res_ptr); + *res_ptr |= (1UL << bitshiftcnt); + pide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } + } + goto not_found; + + } + + if (likely(bits_wanted <= BITS_PER_LONG/2)) { + /* + ** Search the resource bit map on well-aligned values. + ** "o" is the alignment. + ** We need the alignment to invalidate I/O TLB using + ** SBA HW features in the unmap path. + */ + unsigned long o = 1 << get_iovp_order(bits_wanted << iovp_shift); + uint bitshiftcnt = ROUNDUP(ioc->res_bitshift, o); + unsigned long mask, base_mask; + + base_mask = RESMAP_MASK(bits_wanted); + mask = base_mask << bitshiftcnt; + + DBG_RES("%s() o %ld %p", __func__, o, res_ptr); + for(; res_ptr < res_end ; res_ptr++) + { + DBG_RES(" %p %lx %lx\n", res_ptr, mask, *res_ptr); + ASSERT(0 != mask); + for (; mask ; mask <<= o, bitshiftcnt += o) { + tpide = ptr_to_pide(ioc, res_ptr, bitshiftcnt); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, + boundary_size); + if ((0 == ((*res_ptr) & mask)) && !ret) { + *res_ptr |= mask; /* mark resources busy! */ + pide = tpide; + ioc->res_bitshift = bitshiftcnt + bits_wanted; + goto found_it; + } + } + + bitshiftcnt = 0; + mask = base_mask; + + } + + } else { + int qwords, bits, i; + unsigned long *end; + + qwords = bits_wanted >> 6; /* /64 */ + bits = bits_wanted - (qwords * BITS_PER_LONG); + + end = res_end - qwords; + + for (; res_ptr < end; res_ptr++) { + tpide = ptr_to_pide(ioc, res_ptr, 0); + ret = iommu_is_span_boundary(tpide, bits_wanted, + shift, boundary_size); + if (ret) + goto next_ptr; + for (i = 0 ; i < qwords ; i++) { + if (res_ptr[i] != 0) + goto next_ptr; + } + if (bits && res_ptr[i] && (__ffs(res_ptr[i]) < bits)) + continue; + + /* Found it, mark it */ + for (i = 0 ; i < qwords ; i++) + res_ptr[i] = ~0UL; + res_ptr[i] |= RESMAP_MASK(bits); + + pide = tpide; + res_ptr += qwords; + ioc->res_bitshift = bits; + goto found_it; +next_ptr: + ; + } + } + +not_found: + prefetch(ioc->res_map); + ioc->res_hint = (unsigned long *) ioc->res_map; + ioc->res_bitshift = 0; + spin_unlock_irqrestore(&ioc->res_lock, flags); + return (pide); + +found_it: + ioc->res_hint = res_ptr; + spin_unlock_irqrestore(&ioc->res_lock, flags); + return (pide); +} + + +/** + * sba_alloc_range - find free bits and mark them in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @size: number of bytes to create a mapping for + * + * Given a size, find consecutive unmarked and then mark those bits in the + * resource bit map. + */ +static int +sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size) +{ + unsigned int pages_needed = size >> iovp_shift; +#ifdef PDIR_SEARCH_TIMING + unsigned long itc_start; +#endif + unsigned long pide; + + ASSERT(pages_needed); + ASSERT(0 == (size & ~iovp_mask)); + +#ifdef PDIR_SEARCH_TIMING + itc_start = ia64_get_itc(); +#endif + /* + ** "seek and ye shall find"...praying never hurts either... + */ + pide = sba_search_bitmap(ioc, dev, pages_needed, 1); + if (unlikely(pide >= (ioc->res_size << 3))) { + pide = sba_search_bitmap(ioc, dev, pages_needed, 0); + if (unlikely(pide >= (ioc->res_size << 3))) { +#if DELAYED_RESOURCE_CNT > 0 + unsigned long flags; + + /* + ** With delayed resource freeing, we can give this one more shot. We're + ** getting close to being in trouble here, so do what we can to make this + ** one count. + */ + spin_lock_irqsave(&ioc->saved_lock, flags); + if (ioc->saved_cnt > 0) { + struct sba_dma_pair *d; + int cnt = ioc->saved_cnt; + + d = &(ioc->saved[ioc->saved_cnt - 1]); + + spin_lock(&ioc->res_lock); + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); + } + spin_unlock_irqrestore(&ioc->saved_lock, flags); + + pide = sba_search_bitmap(ioc, dev, pages_needed, 0); + if (unlikely(pide >= (ioc->res_size << 3))) { + printk(KERN_WARNING "%s: I/O MMU @ %p is" + "out of mapping resources, %u %u %lx\n", + __func__, ioc->ioc_hpa, ioc->res_size, + pages_needed, dma_get_seg_boundary(dev)); + return -1; + } +#else + printk(KERN_WARNING "%s: I/O MMU @ %p is" + "out of mapping resources, %u %u %lx\n", + __func__, ioc->ioc_hpa, ioc->res_size, + pages_needed, dma_get_seg_boundary(dev)); + return -1; +#endif + } + } + +#ifdef PDIR_SEARCH_TIMING + ioc->avg_search[ioc->avg_idx++] = (ia64_get_itc() - itc_start) / pages_needed; + ioc->avg_idx &= SBA_SEARCH_SAMPLE - 1; +#endif + + prefetchw(&(ioc->pdir_base[pide])); + +#ifdef ASSERT_PDIR_SANITY + /* verify the first enable bit is clear */ + if(0x00 != ((u8 *) ioc->pdir_base)[pide*PDIR_ENTRY_SIZE + 7]) { + sba_dump_pdir_entry(ioc, "sba_search_bitmap() botched it?", pide); + } +#endif + + DBG_RES("%s(%x) %d -> %lx hint %x/%x\n", + __func__, size, pages_needed, pide, + (uint) ((unsigned long) ioc->res_hint - (unsigned long) ioc->res_map), + ioc->res_bitshift ); + + return (pide); +} + + +/** + * sba_free_range - unmark bits in IO PDIR resource bitmap + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO virtual address which was previously allocated. + * @size: number of bytes to create a mapping for + * + * clear bits in the ioc's resource map + */ +static SBA_INLINE void +sba_free_range(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + unsigned long iovp = SBA_IOVP(ioc, iova); + unsigned int pide = PDIR_INDEX(iovp); + unsigned int ridx = pide >> 3; /* convert bit to byte address */ + unsigned long *res_ptr = (unsigned long *) &((ioc)->res_map[ridx & ~RESMAP_IDX_MASK]); + int bits_not_wanted = size >> iovp_shift; + unsigned long m; + + /* Round up to power-of-two size: see AR2305 note above */ + bits_not_wanted = 1UL << get_iovp_order(bits_not_wanted << iovp_shift); + for (; bits_not_wanted > 0 ; res_ptr++) { + + if (unlikely(bits_not_wanted > BITS_PER_LONG)) { + + /* these mappings start 64bit aligned */ + *res_ptr = 0UL; + bits_not_wanted -= BITS_PER_LONG; + pide += BITS_PER_LONG; + + } else { + + /* 3-bits "bit" address plus 2 (or 3) bits for "byte" == bit in word */ + m = RESMAP_MASK(bits_not_wanted) << (pide & (BITS_PER_LONG - 1)); + bits_not_wanted = 0; + + DBG_RES("%s( ,%x,%x) %x/%lx %x %p %lx\n", __func__, (uint) iova, size, + bits_not_wanted, m, pide, res_ptr, *res_ptr); + + ASSERT(m != 0); + ASSERT(bits_not_wanted); + ASSERT((*res_ptr & m) == m); /* verify same bits are set */ + *res_ptr &= ~m; + } + } +} + + +/************************************************************** +* +* "Dynamic DMA Mapping" support (aka "Coherent I/O") +* +***************************************************************/ + +/** + * sba_io_pdir_entry - fill in one IO PDIR entry + * @pdir_ptr: pointer to IO PDIR entry + * @vba: Virtual CPU address of buffer to map + * + * SBA Mapping Routine + * + * Given a virtual address (vba, arg1) sba_io_pdir_entry() + * loads the I/O PDIR entry pointed to by pdir_ptr (arg0). + * Each IO Pdir entry consists of 8 bytes as shown below + * (LSB == bit 0): + * + * 63 40 11 7 0 + * +-+---------------------+----------------------------------+----+--------+ + * |V| U | PPN[39:12] | U | FF | + * +-+---------------------+----------------------------------+----+--------+ + * + * V == Valid Bit + * U == Unused + * PPN == Physical Page Number + * + * The physical address fields are filled with the results of virt_to_phys() + * on the vba. + */ + +#if 1 +#define sba_io_pdir_entry(pdir_ptr, vba) *pdir_ptr = ((vba & ~0xE000000000000FFFULL) \ + | 0x8000000000000000ULL) +#else +void SBA_INLINE +sba_io_pdir_entry(u64 *pdir_ptr, unsigned long vba) +{ + *pdir_ptr = ((vba & ~0xE000000000000FFFULL) | 0x80000000000000FFULL); +} +#endif + +#ifdef ENABLE_MARK_CLEAN +/* + * Since DMA is i-cache coherent, any (complete) pages that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +static void mark_clean(void *addr, size_t size) +{ + struct folio *folio = virt_to_folio(addr); + ssize_t left = size; + size_t offset = offset_in_folio(folio, addr); + + if (offset) { + left -= folio_size(folio) - offset; + if (left <= 0) + return; + folio = folio_next(folio); + } + + while (left >= folio_size(folio)) { + left -= folio_size(folio); + set_bit(PG_arch_1, &folio->flags); + if (!left) + break; + folio = folio_next(folio); + } +} +#endif + +/** + * sba_mark_invalid - invalidate one or more IO PDIR entries + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @iova: IO Virtual Address mapped earlier + * @byte_cnt: number of bytes this mapping covers. + * + * Marking the IO PDIR entry(ies) as Invalid and invalidate + * corresponding IO TLB entry. The PCOM (Purge Command Register) + * is to purge stale entries in the IO TLB when unmapping entries. + * + * The PCOM register supports purging of multiple pages, with a minium + * of 1 page and a maximum of 2GB. Hardware requires the address be + * aligned to the size of the range being purged. The size of the range + * must be a power of 2. The "Cool perf optimization" in the + * allocation routine helps keep that true. + */ +static SBA_INLINE void +sba_mark_invalid(struct ioc *ioc, dma_addr_t iova, size_t byte_cnt) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + + int off = PDIR_INDEX(iovp); + + /* Must be non-zero and rounded up */ + ASSERT(byte_cnt > 0); + ASSERT(0 == (byte_cnt & ~iovp_mask)); + +#ifdef ASSERT_PDIR_SANITY + /* Assert first pdir entry is set */ + if (!(ioc->pdir_base[off] >> 60)) { + sba_dump_pdir_entry(ioc,"sba_mark_invalid()", PDIR_INDEX(iovp)); + } +#endif + + if (byte_cnt <= iovp_size) + { + ASSERT(off < ioc->pdir_size); + + iovp |= iovp_shift; /* set "size" field for PCOM */ + +#ifndef FULL_VALID_PDIR + /* + ** clear I/O PDIR entry "valid" bit + ** Do NOT clear the rest - save it for debugging. + ** We should only clear bits that have previously + ** been enabled. + */ + ioc->pdir_base[off] &= ~(0x80000000000000FFULL); +#else + /* + ** If we want to maintain the PDIR as valid, put in + ** the spill page so devices prefetching won't + ** cause a hard fail. + */ + ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); +#endif + } else { + u32 t = get_iovp_order(byte_cnt) + iovp_shift; + + iovp |= t; + ASSERT(t <= 31); /* 2GB! Max value of "size" field */ + + do { + /* verify this pdir entry is enabled */ + ASSERT(ioc->pdir_base[off] >> 63); +#ifndef FULL_VALID_PDIR + /* clear I/O Pdir entry "valid" bit first */ + ioc->pdir_base[off] &= ~(0x80000000000000FFULL); +#else + ioc->pdir_base[off] = (0x80000000000000FFULL | prefetch_spill_page); +#endif + off++; + byte_cnt -= iovp_size; + } while (byte_cnt > 0); + } + + WRITE_REG(iovp | ioc->ibase, ioc->ioc_hpa+IOC_PCOM); +} + +/** + * sba_map_page - map one buffer and return IOVA for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @page: page to map + * @poff: offset into page + * @size: number of bytes to map + * @dir: dma direction + * @attrs: optional dma attributes + * + * See Documentation/core-api/dma-api-howto.rst + */ +static dma_addr_t sba_map_page(struct device *dev, struct page *page, + unsigned long poff, size_t size, + enum dma_data_direction dir, + unsigned long attrs) +{ + struct ioc *ioc; + void *addr = page_address(page) + poff; + dma_addr_t iovp; + dma_addr_t offset; + u64 *pdir_start; + int pide; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif +#ifdef ALLOW_IOV_BYPASS + unsigned long pci_addr = virt_to_phys(addr); +#endif + +#ifdef ALLOW_IOV_BYPASS + ASSERT(to_pci_dev(dev)->dma_mask); + /* + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((pci_addr & ~to_pci_dev(dev)->dma_mask) == 0)) { + /* + ** Device is bit capable of DMA'ing to the buffer... + ** just return the PCI address of ptr + */ + DBG_BYPASS("sba_map_page() bypass mask/addr: " + "0x%lx/0x%lx\n", + to_pci_dev(dev)->dma_mask, pci_addr); + return pci_addr; + } +#endif + ioc = GET_IOC(dev); + ASSERT(ioc); + + prefetch(ioc->res_hint); + + ASSERT(size > 0); + ASSERT(size <= DMA_CHUNK_SIZE); + + /* save offset bits */ + offset = ((dma_addr_t) (long) addr) & ~iovp_mask; + + /* round up to nearest iovp_size */ + size = (size + offset + ~iovp_mask) & iovp_mask; + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check before sba_map_page()")) + panic("Sanity check failed"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + pide = sba_alloc_range(ioc, dev, size); + if (pide < 0) + return DMA_MAPPING_ERROR; + + iovp = (dma_addr_t) pide << iovp_shift; + + DBG_RUN("%s() 0x%p -> 0x%lx\n", __func__, addr, (long) iovp | offset); + + pdir_start = &(ioc->pdir_base[pide]); + + while (size > 0) { + ASSERT(((u8 *)pdir_start)[7] == 0); /* verify availability */ + sba_io_pdir_entry(pdir_start, (unsigned long) addr); + + DBG_RUN(" pdir 0x%p %lx\n", pdir_start, *pdir_start); + + addr += iovp_size; + size -= iovp_size; + pdir_start++; + } + /* force pdir update */ + wmb(); + + /* form complete address */ +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check after sba_map_page()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + return SBA_IOVA(ioc, iovp, offset); +} + +#ifdef ENABLE_MARK_CLEAN +static SBA_INLINE void +sba_mark_clean(struct ioc *ioc, dma_addr_t iova, size_t size) +{ + u32 iovp = (u32) SBA_IOVP(ioc,iova); + int off = PDIR_INDEX(iovp); + void *addr; + + if (size <= iovp_size) { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, size); + } else { + do { + addr = phys_to_virt(ioc->pdir_base[off] & + ~0xE000000000000FFFULL); + mark_clean(addr, min(size, iovp_size)); + off++; + size -= iovp_size; + } while (size > 0); + } +} +#endif + +/** + * sba_unmap_page - unmap one IOVA and free resources + * @dev: instance of PCI owned by the driver that's asking. + * @iova: IOVA of driver buffer previously mapped. + * @size: number of bytes mapped in driver buffer. + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/core-api/dma-api-howto.rst + */ +static void sba_unmap_page(struct device *dev, dma_addr_t iova, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ + struct ioc *ioc; +#if DELAYED_RESOURCE_CNT > 0 + struct sba_dma_pair *d; +#endif + unsigned long flags; + dma_addr_t offset; + + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef ALLOW_IOV_BYPASS + if (likely((iova & ioc->imask) != ioc->ibase)) { + /* + ** Address does not fall w/in IOVA, must be bypassing + */ + DBG_BYPASS("sba_unmap_page() bypass addr: 0x%lx\n", + iova); + +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) { + mark_clean(phys_to_virt(iova), size); + } +#endif + return; + } +#endif + offset = iova & ~iovp_mask; + + DBG_RUN("%s() iovp 0x%lx/%x\n", __func__, (long) iova, size); + + iova ^= offset; /* clear offset bits */ + size += offset; + size = ROUNDUP(size, iovp_size); + +#ifdef ENABLE_MARK_CLEAN + if (dir == DMA_FROM_DEVICE) + sba_mark_clean(ioc, iova, size); +#endif + +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_irqsave(&ioc->saved_lock, flags); + d = &(ioc->saved[ioc->saved_cnt]); + d->iova = iova; + d->size = size; + if (unlikely(++(ioc->saved_cnt) >= DELAYED_RESOURCE_CNT)) { + int cnt = ioc->saved_cnt; + spin_lock(&ioc->res_lock); + while (cnt--) { + sba_mark_invalid(ioc, d->iova, d->size); + sba_free_range(ioc, d->iova, d->size); + d--; + } + ioc->saved_cnt = 0; + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock(&ioc->res_lock); + } + spin_unlock_irqrestore(&ioc->saved_lock, flags); +#else /* DELAYED_RESOURCE_CNT == 0 */ + spin_lock_irqsave(&ioc->res_lock, flags); + sba_mark_invalid(ioc, iova, size); + sba_free_range(ioc, iova, size); + READ_REG(ioc->ioc_hpa+IOC_PCOM); /* flush purges */ + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif /* DELAYED_RESOURCE_CNT == 0 */ +} + +/** + * sba_alloc_coherent - allocate/map shared mem for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @dma_handle: IOVA of new buffer. + * + * See Documentation/core-api/dma-api-howto.rst + */ +static void * +sba_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, unsigned long attrs) +{ + struct page *page; + struct ioc *ioc; + int node = -1; + void *addr; + + ioc = GET_IOC(dev); + ASSERT(ioc); +#ifdef CONFIG_NUMA + node = ioc->node; +#endif + + page = alloc_pages_node(node, flags, get_order(size)); + if (unlikely(!page)) + return NULL; + + addr = page_address(page); + memset(addr, 0, size); + *dma_handle = page_to_phys(page); + +#ifdef ALLOW_IOV_BYPASS + ASSERT(dev->coherent_dma_mask); + /* + ** Check if the PCI device can DMA to ptr... if so, just return ptr + */ + if (likely((*dma_handle & ~dev->coherent_dma_mask) == 0)) { + DBG_BYPASS("sba_alloc_coherent() bypass mask/addr: 0x%lx/0x%lx\n", + dev->coherent_dma_mask, *dma_handle); + + return addr; + } +#endif + + /* + * If device can't bypass or bypass is disabled, pass the 32bit fake + * device to map single to get an iova mapping. + */ + *dma_handle = sba_map_page(&ioc->sac_only_dev->dev, page, 0, size, + DMA_BIDIRECTIONAL, 0); + if (dma_mapping_error(dev, *dma_handle)) + return NULL; + return addr; +} + + +/** + * sba_free_coherent - free/unmap shared mem for DMA + * @dev: instance of PCI owned by the driver that's asking. + * @size: number of bytes mapped in driver buffer. + * @vaddr: virtual address IOVA of "consistent" buffer. + * @dma_handler: IO virtual address of "consistent" buffer. + * + * See Documentation/core-api/dma-api-howto.rst + */ +static void sba_free_coherent(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) +{ + sba_unmap_page(dev, dma_handle, size, 0, 0); + free_pages((unsigned long) vaddr, get_order(size)); +} + + +/* +** Since 0 is a valid pdir_base index value, can't use that +** to determine if a value is valid or not. Use a flag to indicate +** the SG list entry contains a valid pdir index. +*/ +#define PIDE_FLAG 0x1UL + +#ifdef DEBUG_LARGE_SG_ENTRIES +int dump_run_sg = 0; +#endif + + +/** + * sba_fill_pdir - write allocated SG entries into IO PDIR + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: list of IOVA/size pairs + * @nents: number of entries in startsg list + * + * Take preprocessed SG list and write corresponding entries + * in the IO PDIR. + */ + +static SBA_INLINE int +sba_fill_pdir( + struct ioc *ioc, + struct scatterlist *startsg, + int nents) +{ + struct scatterlist *dma_sg = startsg; /* pointer to current DMA */ + int n_mappings = 0; + u64 *pdirp = NULL; + unsigned long dma_offset = 0; + + while (nents-- > 0) { + int cnt = startsg->dma_length; + startsg->dma_length = 0; + +#ifdef DEBUG_LARGE_SG_ENTRIES + if (dump_run_sg) + printk(" %2d : %08lx/%05x %p\n", + nents, startsg->dma_address, cnt, + sba_sg_address(startsg)); +#else + DBG_RUN_SG(" %d : %08lx/%05x %p\n", + nents, startsg->dma_address, cnt, + sba_sg_address(startsg)); +#endif + /* + ** Look for the start of a new DMA stream + */ + if (startsg->dma_address & PIDE_FLAG) { + u32 pide = startsg->dma_address & ~PIDE_FLAG; + dma_offset = (unsigned long) pide & ~iovp_mask; + startsg->dma_address = 0; + if (n_mappings) + dma_sg = sg_next(dma_sg); + dma_sg->dma_address = pide | ioc->ibase; + pdirp = &(ioc->pdir_base[pide >> iovp_shift]); + n_mappings++; + } + + /* + ** Look for a VCONTIG chunk + */ + if (cnt) { + unsigned long vaddr = (unsigned long) sba_sg_address(startsg); + ASSERT(pdirp); + + /* Since multiple Vcontig blocks could make up + ** one DMA stream, *add* cnt to dma_len. + */ + dma_sg->dma_length += cnt; + cnt += dma_offset; + dma_offset=0; /* only want offset on first chunk */ + cnt = ROUNDUP(cnt, iovp_size); + do { + sba_io_pdir_entry(pdirp, vaddr); + vaddr += iovp_size; + cnt -= iovp_size; + pdirp++; + } while (cnt > 0); + } + startsg = sg_next(startsg); + } + /* force pdir update */ + wmb(); + +#ifdef DEBUG_LARGE_SG_ENTRIES + dump_run_sg = 0; +#endif + return(n_mappings); +} + + +/* +** Two address ranges are DMA contiguous *iff* "end of prev" and +** "start of next" are both on an IOV page boundary. +** +** (shift left is a quick trick to mask off upper bits) +*/ +#define DMA_CONTIG(__X, __Y) \ + (((((unsigned long) __X) | ((unsigned long) __Y)) << (BITS_PER_LONG - iovp_shift)) == 0UL) + + +/** + * sba_coalesce_chunks - preprocess the SG list + * @ioc: IO MMU structure which owns the pdir we are interested in. + * @startsg: list of IOVA/size pairs + * @nents: number of entries in startsg list + * + * First pass is to walk the SG list and determine where the breaks are + * in the DMA stream. Allocates PDIR entries but does not fill them. + * Returns the number of DMA chunks. + * + * Doing the fill separate from the coalescing/allocation keeps the + * code simpler. Future enhancement could make one pass through + * the sglist do both. + */ +static SBA_INLINE int +sba_coalesce_chunks(struct ioc *ioc, struct device *dev, + struct scatterlist *startsg, + int nents) +{ + struct scatterlist *vcontig_sg; /* VCONTIG chunk head */ + unsigned long vcontig_len; /* len of VCONTIG chunk */ + unsigned long vcontig_end; + struct scatterlist *dma_sg; /* next DMA stream head */ + unsigned long dma_offset, dma_len; /* start/len of DMA stream */ + int n_mappings = 0; + unsigned int max_seg_size = dma_get_max_seg_size(dev); + int idx; + + while (nents > 0) { + unsigned long vaddr = (unsigned long) sba_sg_address(startsg); + + /* + ** Prepare for first/next DMA stream + */ + dma_sg = vcontig_sg = startsg; + dma_len = vcontig_len = vcontig_end = startsg->length; + vcontig_end += vaddr; + dma_offset = vaddr & ~iovp_mask; + + /* PARANOID: clear entries */ + startsg->dma_address = startsg->dma_length = 0; + + /* + ** This loop terminates one iteration "early" since + ** it's always looking one "ahead". + */ + while (--nents > 0) { + unsigned long vaddr; /* tmp */ + + startsg = sg_next(startsg); + + /* PARANOID */ + startsg->dma_address = startsg->dma_length = 0; + + /* catch brokenness in SCSI layer */ + ASSERT(startsg->length <= DMA_CHUNK_SIZE); + + /* + ** First make sure current dma stream won't + ** exceed DMA_CHUNK_SIZE if we coalesce the + ** next entry. + */ + if (((dma_len + dma_offset + startsg->length + ~iovp_mask) & iovp_mask) + > DMA_CHUNK_SIZE) + break; + + if (dma_len + startsg->length > max_seg_size) + break; + + /* + ** Then look for virtually contiguous blocks. + ** + ** append the next transaction? + */ + vaddr = (unsigned long) sba_sg_address(startsg); + if (vcontig_end == vaddr) + { + vcontig_len += startsg->length; + vcontig_end += startsg->length; + dma_len += startsg->length; + continue; + } + +#ifdef DEBUG_LARGE_SG_ENTRIES + dump_run_sg = (vcontig_len > iovp_size); +#endif + + /* + ** Not virtually contiguous. + ** Terminate prev chunk. + ** Start a new chunk. + ** + ** Once we start a new VCONTIG chunk, dma_offset + ** can't change. And we need the offset from the first + ** chunk - not the last one. Ergo Successive chunks + ** must start on page boundaries and dove tail + ** with it's predecessor. + */ + vcontig_sg->dma_length = vcontig_len; + + vcontig_sg = startsg; + vcontig_len = startsg->length; + + /* + ** 3) do the entries end/start on page boundaries? + ** Don't update vcontig_end until we've checked. + */ + if (DMA_CONTIG(vcontig_end, vaddr)) + { + vcontig_end = vcontig_len + vaddr; + dma_len += vcontig_len; + continue; + } else { + break; + } + } + + /* + ** End of DMA Stream + ** Terminate last VCONTIG block. + ** Allocate space for DMA stream. + */ + vcontig_sg->dma_length = vcontig_len; + dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask; + ASSERT(dma_len <= DMA_CHUNK_SIZE); + idx = sba_alloc_range(ioc, dev, dma_len); + if (idx < 0) { + dma_sg->dma_length = 0; + return -1; + } + dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift) + | dma_offset); + n_mappings++; + } + + return n_mappings; +} + +static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + unsigned long attrs); +/** + * sba_map_sg - map Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/core-api/dma-api-howto.rst + */ +static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ + struct ioc *ioc; + int coalesced, filled = 0; +#ifdef ASSERT_PDIR_SANITY + unsigned long flags; +#endif +#ifdef ALLOW_IOV_BYPASS_SG + struct scatterlist *sg; +#endif + + DBG_RUN_SG("%s() START %d entries\n", __func__, nents); + ioc = GET_IOC(dev); + ASSERT(ioc); + +#ifdef ALLOW_IOV_BYPASS_SG + ASSERT(to_pci_dev(dev)->dma_mask); + if (likely((ioc->dma_mask & ~to_pci_dev(dev)->dma_mask) == 0)) { + for_each_sg(sglist, sg, nents, filled) { + sg->dma_length = sg->length; + sg->dma_address = virt_to_phys(sba_sg_address(sg)); + } + return filled; + } +#endif + /* Fast path single entry scatterlists. */ + if (nents == 1) { + sglist->dma_length = sglist->length; + sglist->dma_address = sba_map_page(dev, sg_page(sglist), + sglist->offset, sglist->length, dir, attrs); + if (dma_mapping_error(dev, sglist->dma_address)) + return -EIO; + return 1; + } + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check before sba_map_sg_attrs()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check before sba_map_sg_attrs()"); + } + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + prefetch(ioc->res_hint); + + /* + ** First coalesce the chunks and allocate I/O pdir space + ** + ** If this is one DMA stream, we can properly map using the + ** correct virtual address associated with each DMA page. + ** w/o this association, we wouldn't have coherent DMA! + ** Access to the virtual address is what forces a two pass algorithm. + */ + coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents); + if (coalesced < 0) { + sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs); + return -ENOMEM; + } + + /* + ** Program the I/O Pdir + ** + ** map the virtual addresses to the I/O Pdir + ** o dma_address will contain the pdir index + ** o dma_len will contain the number of bytes to map + ** o address contains the virtual address. + */ + filled = sba_fill_pdir(ioc, sglist, nents); + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + if (sba_check_pdir(ioc,"Check after sba_map_sg_attrs()")) + { + sba_dump_sg(ioc, sglist, nents); + panic("Check after sba_map_sg_attrs()\n"); + } + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + ASSERT(coalesced == filled); + DBG_RUN_SG("%s() DONE %d mappings\n", __func__, filled); + + return filled; +} + +/** + * sba_unmap_sg_attrs - unmap Scatter/Gather list + * @dev: instance of PCI owned by the driver that's asking. + * @sglist: array of buffer/length pairs + * @nents: number of entries in list + * @dir: R/W or both. + * @attrs: optional dma attributes + * + * See Documentation/core-api/dma-api-howto.rst + */ +static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist, + int nents, enum dma_data_direction dir, + unsigned long attrs) +{ +#ifdef ASSERT_PDIR_SANITY + struct ioc *ioc; + unsigned long flags; +#endif + + DBG_RUN_SG("%s() START %d entries, %p,%x\n", + __func__, nents, sba_sg_address(sglist), sglist->length); + +#ifdef ASSERT_PDIR_SANITY + ioc = GET_IOC(dev); + ASSERT(ioc); + + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check before sba_unmap_sg_attrs()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + + while (nents && sglist->dma_length) { + + sba_unmap_page(dev, sglist->dma_address, sglist->dma_length, + dir, attrs); + sglist = sg_next(sglist); + nents--; + } + + DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); + +#ifdef ASSERT_PDIR_SANITY + spin_lock_irqsave(&ioc->res_lock, flags); + sba_check_pdir(ioc,"Check after sba_unmap_sg_attrs()"); + spin_unlock_irqrestore(&ioc->res_lock, flags); +#endif + +} + +/************************************************************** +* +* Initialization and claim +* +***************************************************************/ + +static void +ioc_iova_init(struct ioc *ioc) +{ + int tcnfg; + int agp_found = 0; + struct pci_dev *device = NULL; +#ifdef FULL_VALID_PDIR + unsigned long index; +#endif + + /* + ** Firmware programs the base and size of a "safe IOVA space" + ** (one that doesn't overlap memory or LMMIO space) in the + ** IBASE and IMASK registers. + */ + ioc->ibase = READ_REG(ioc->ioc_hpa + IOC_IBASE) & ~0x1UL; + ioc->imask = READ_REG(ioc->ioc_hpa + IOC_IMASK) | 0xFFFFFFFF00000000UL; + + ioc->iov_size = ~ioc->imask + 1; + + DBG_INIT("%s() hpa %p IOV base 0x%lx mask 0x%lx (%dMB)\n", + __func__, ioc->ioc_hpa, ioc->ibase, ioc->imask, + ioc->iov_size >> 20); + + switch (iovp_size) { + case 4*1024: tcnfg = 0; break; + case 8*1024: tcnfg = 1; break; + case 16*1024: tcnfg = 2; break; + case 64*1024: tcnfg = 3; break; + default: + panic(PFX "Unsupported IOTLB page size %ldK", + iovp_size >> 10); + break; + } + WRITE_REG(tcnfg, ioc->ioc_hpa + IOC_TCNFG); + + ioc->pdir_size = (ioc->iov_size / iovp_size) * PDIR_ENTRY_SIZE; + ioc->pdir_base = (void *) __get_free_pages(GFP_KERNEL, + get_order(ioc->pdir_size)); + if (!ioc->pdir_base) + panic(PFX "Couldn't allocate I/O Page Table\n"); + + memset(ioc->pdir_base, 0, ioc->pdir_size); + + DBG_INIT("%s() IOV page size %ldK pdir %p size %x\n", __func__, + iovp_size >> 10, ioc->pdir_base, ioc->pdir_size); + + ASSERT(ALIGN((unsigned long) ioc->pdir_base, 4*1024) == (unsigned long) ioc->pdir_base); + WRITE_REG(virt_to_phys(ioc->pdir_base), ioc->ioc_hpa + IOC_PDIR_BASE); + + /* + ** If an AGP device is present, only use half of the IOV space + ** for PCI DMA. Unfortunately we can't know ahead of time + ** whether GART support will actually be used, for now we + ** can just key on an AGP device found in the system. + ** We program the next pdir index after we stop w/ a key for + ** the GART code to handshake on. + */ + for_each_pci_dev(device) + agp_found |= pci_find_capability(device, PCI_CAP_ID_AGP); + + if (agp_found && reserve_sba_gart) { + printk(KERN_INFO PFX "reserving %dMb of IOVA space at 0x%lx for agpgart\n", + ioc->iov_size/2 >> 20, ioc->ibase + ioc->iov_size/2); + ioc->pdir_size /= 2; + ((u64 *)ioc->pdir_base)[PDIR_INDEX(ioc->iov_size/2)] = ZX1_SBA_IOMMU_COOKIE; + } +#ifdef FULL_VALID_PDIR + /* + ** Check to see if the spill page has been allocated, we don't need more than + ** one across multiple SBAs. + */ + if (!prefetch_spill_page) { + char *spill_poison = "SBAIOMMU POISON"; + int poison_size = 16; + void *poison_addr, *addr; + + addr = (void *)__get_free_pages(GFP_KERNEL, get_order(iovp_size)); + if (!addr) + panic(PFX "Couldn't allocate PDIR spill page\n"); + + poison_addr = addr; + for ( ; (u64) poison_addr < addr + iovp_size; poison_addr += poison_size) + memcpy(poison_addr, spill_poison, poison_size); + + prefetch_spill_page = virt_to_phys(addr); + + DBG_INIT("%s() prefetch spill addr: 0x%lx\n", __func__, prefetch_spill_page); + } + /* + ** Set all the PDIR entries valid w/ the spill page as the target + */ + for (index = 0 ; index < (ioc->pdir_size / PDIR_ENTRY_SIZE) ; index++) + ((u64 *)ioc->pdir_base)[index] = (0x80000000000000FF | prefetch_spill_page); +#endif + + /* Clear I/O TLB of any possible entries */ + WRITE_REG(ioc->ibase | (get_iovp_order(ioc->iov_size) + iovp_shift), ioc->ioc_hpa + IOC_PCOM); + READ_REG(ioc->ioc_hpa + IOC_PCOM); + + /* Enable IOVA translation */ + WRITE_REG(ioc->ibase | 1, ioc->ioc_hpa + IOC_IBASE); + READ_REG(ioc->ioc_hpa + IOC_IBASE); +} + +static void __init +ioc_resource_init(struct ioc *ioc) +{ + spin_lock_init(&ioc->res_lock); +#if DELAYED_RESOURCE_CNT > 0 + spin_lock_init(&ioc->saved_lock); +#endif + + /* resource map size dictated by pdir_size */ + ioc->res_size = ioc->pdir_size / PDIR_ENTRY_SIZE; /* entries */ + ioc->res_size >>= 3; /* convert bit count to byte count */ + DBG_INIT("%s() res_size 0x%x\n", __func__, ioc->res_size); + + ioc->res_map = (char *) __get_free_pages(GFP_KERNEL, + get_order(ioc->res_size)); + if (!ioc->res_map) + panic(PFX "Couldn't allocate resource map\n"); + + memset(ioc->res_map, 0, ioc->res_size); + /* next available IOVP - circular search */ + ioc->res_hint = (unsigned long *) ioc->res_map; + +#ifdef ASSERT_PDIR_SANITY + /* Mark first bit busy - ie no IOVA 0 */ + ioc->res_map[0] = 0x1; + ioc->pdir_base[0] = 0x8000000000000000ULL | ZX1_SBA_IOMMU_COOKIE; +#endif +#ifdef FULL_VALID_PDIR + /* Mark the last resource used so we don't prefetch beyond IOVA space */ + ioc->res_map[ioc->res_size - 1] |= 0x80UL; /* res_map is chars */ + ioc->pdir_base[(ioc->pdir_size / PDIR_ENTRY_SIZE) - 1] = (0x80000000000000FF + | prefetch_spill_page); +#endif + + DBG_INIT("%s() res_map %x %p\n", __func__, + ioc->res_size, (void *) ioc->res_map); +} + +static void __init +ioc_sac_init(struct ioc *ioc) +{ + struct pci_dev *sac = NULL; + struct pci_controller *controller = NULL; + + /* + * pci_alloc_coherent() must return a DMA address which is + * SAC (single address cycle) addressable, so allocate a + * pseudo-device to enforce that. + */ + sac = kzalloc(sizeof(*sac), GFP_KERNEL); + if (!sac) + panic(PFX "Couldn't allocate struct pci_dev"); + + controller = kzalloc(sizeof(*controller), GFP_KERNEL); + if (!controller) + panic(PFX "Couldn't allocate struct pci_controller"); + + controller->iommu = ioc; + sac->sysdata = controller; + sac->dma_mask = 0xFFFFFFFFUL; + sac->dev.bus = &pci_bus_type; + ioc->sac_only_dev = sac; +} + +static void __init +ioc_zx1_init(struct ioc *ioc) +{ + unsigned long rope_config; + unsigned int i; + + if (ioc->rev < 0x20) + panic(PFX "IOC 2.0 or later required for IOMMU support\n"); + + /* 38 bit memory controller + extra bit for range displaced by MMIO */ + ioc->dma_mask = (0x1UL << 39) - 1; + + /* + ** Clear ROPE(N)_CONFIG AO bit. + ** Disables "NT Ordering" (~= !"Relaxed Ordering") + ** Overrides bit 1 in DMA Hint Sets. + ** Improves netperf UDP_STREAM by ~10% for tg3 on bcm5701. + */ + for (i=0; i<(8*8); i+=8) { + rope_config = READ_REG(ioc->ioc_hpa + IOC_ROPE0_CFG + i); + rope_config &= ~IOC_ROPE_AO; + WRITE_REG(rope_config, ioc->ioc_hpa + IOC_ROPE0_CFG + i); + } +} + +typedef void (initfunc)(struct ioc *); + +struct ioc_iommu { + u32 func_id; + char *name; + initfunc *init; +}; + +static struct ioc_iommu ioc_iommu_info[] __initdata = { + { ZX1_IOC_ID, "zx1", ioc_zx1_init }, + { ZX2_IOC_ID, "zx2", NULL }, + { SX1000_IOC_ID, "sx1000", NULL }, + { SX2000_IOC_ID, "sx2000", NULL }, +}; + +static void __init ioc_init(unsigned long hpa, struct ioc *ioc) +{ + struct ioc_iommu *info; + + ioc->next = ioc_list; + ioc_list = ioc; + + ioc->ioc_hpa = ioremap(hpa, 0x1000); + + ioc->func_id = READ_REG(ioc->ioc_hpa + IOC_FUNC_ID); + ioc->rev = READ_REG(ioc->ioc_hpa + IOC_FCLASS) & 0xFFUL; + ioc->dma_mask = 0xFFFFFFFFFFFFFFFFUL; /* conservative */ + + for (info = ioc_iommu_info; info < ioc_iommu_info + ARRAY_SIZE(ioc_iommu_info); info++) { + if (ioc->func_id == info->func_id) { + ioc->name = info->name; + if (info->init) + (info->init)(ioc); + } + } + + iovp_size = (1 << iovp_shift); + iovp_mask = ~(iovp_size - 1); + + DBG_INIT("%s: PAGE_SIZE %ldK, iovp_size %ldK\n", __func__, + PAGE_SIZE >> 10, iovp_size >> 10); + + if (!ioc->name) { + ioc->name = kmalloc(24, GFP_KERNEL); + if (ioc->name) + sprintf((char *) ioc->name, "Unknown (%04x:%04x)", + ioc->func_id & 0xFFFF, (ioc->func_id >> 16) & 0xFFFF); + else + ioc->name = "Unknown"; + } + + ioc_iova_init(ioc); + ioc_resource_init(ioc); + ioc_sac_init(ioc); + + printk(KERN_INFO PFX + "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", + ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, + hpa, ioc->iov_size >> 20, ioc->ibase); +} + + + +/************************************************************************** +** +** SBA initialization code (HW and SW) +** +** o identify SBA chip itself +** o FIXME: initialize DMA hints for reasonable defaults +** +**************************************************************************/ + +#ifdef CONFIG_PROC_FS +static void * +ioc_start(struct seq_file *s, loff_t *pos) +{ + struct ioc *ioc; + loff_t n = *pos; + + for (ioc = ioc_list; ioc; ioc = ioc->next) + if (!n--) + return ioc; + + return NULL; +} + +static void * +ioc_next(struct seq_file *s, void *v, loff_t *pos) +{ + struct ioc *ioc = v; + + ++*pos; + return ioc->next; +} + +static void +ioc_stop(struct seq_file *s, void *v) +{ +} + +static int +ioc_show(struct seq_file *s, void *v) +{ + struct ioc *ioc = v; + unsigned long *res_ptr = (unsigned long *)ioc->res_map; + int i, used = 0; + + seq_printf(s, "Hewlett Packard %s IOC rev %d.%d\n", + ioc->name, ((ioc->rev >> 4) & 0xF), (ioc->rev & 0xF)); +#ifdef CONFIG_NUMA + if (ioc->node != NUMA_NO_NODE) + seq_printf(s, "NUMA node : %d\n", ioc->node); +#endif + seq_printf(s, "IOVA size : %ld MB\n", ((ioc->pdir_size >> 3) * iovp_size)/(1024*1024)); + seq_printf(s, "IOVA page size : %ld kb\n", iovp_size/1024); + + for (i = 0; i < (ioc->res_size / sizeof(unsigned long)); ++i, ++res_ptr) + used += hweight64(*res_ptr); + + seq_printf(s, "PDIR size : %d entries\n", ioc->pdir_size >> 3); + seq_printf(s, "PDIR used : %d entries\n", used); + +#ifdef PDIR_SEARCH_TIMING + { + unsigned long i = 0, avg = 0, min, max; + min = max = ioc->avg_search[0]; + for (i = 0; i < SBA_SEARCH_SAMPLE; i++) { + avg += ioc->avg_search[i]; + if (ioc->avg_search[i] > max) max = ioc->avg_search[i]; + if (ioc->avg_search[i] < min) min = ioc->avg_search[i]; + } + avg /= SBA_SEARCH_SAMPLE; + seq_printf(s, "Bitmap search : %ld/%ld/%ld (min/avg/max CPU Cycles/IOVA page)\n", + min, avg, max); + } +#endif +#ifndef ALLOW_IOV_BYPASS + seq_printf(s, "IOVA bypass disabled\n"); +#endif + return 0; +} + +static const struct seq_operations ioc_seq_ops = { + .start = ioc_start, + .next = ioc_next, + .stop = ioc_stop, + .show = ioc_show +}; + +static void __init +ioc_proc_init(void) +{ + struct proc_dir_entry *dir; + + dir = proc_mkdir("bus/mckinley", NULL); + if (!dir) + return; + + proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops); +} +#endif + +static void +sba_connect_bus(struct pci_bus *bus) +{ + acpi_handle handle, parent; + acpi_status status; + struct ioc *ioc; + + if (!PCI_CONTROLLER(bus)) + panic(PFX "no sysdata on bus %d!\n", bus->number); + + if (PCI_CONTROLLER(bus)->iommu) + return; + + handle = acpi_device_handle(PCI_CONTROLLER(bus)->companion); + if (!handle) + return; + + /* + * The IOC scope encloses PCI root bridges in the ACPI + * namespace, so work our way out until we find an IOC we + * claimed previously. + */ + do { + for (ioc = ioc_list; ioc; ioc = ioc->next) + if (ioc->handle == handle) { + PCI_CONTROLLER(bus)->iommu = ioc; + return; + } + + status = acpi_get_parent(handle, &parent); + handle = parent; + } while (ACPI_SUCCESS(status)); + + printk(KERN_WARNING "No IOC for PCI Bus %04x:%02x in ACPI\n", pci_domain_nr(bus), bus->number); +} + +static void __init +sba_map_ioc_to_node(struct ioc *ioc, acpi_handle handle) +{ +#ifdef CONFIG_NUMA + unsigned int node; + + node = acpi_get_node(handle); + if (node != NUMA_NO_NODE && !node_online(node)) + node = NUMA_NO_NODE; + + ioc->node = node; +#endif +} + +static void __init acpi_sba_ioc_add(struct ioc *ioc) +{ + acpi_handle handle = ioc->handle; + acpi_status status; + u64 hpa, length; + struct acpi_device_info *adi; + + ioc_found = ioc->next; + status = hp_acpi_csr_space(handle, &hpa, &length); + if (ACPI_FAILURE(status)) + goto err; + + status = acpi_get_object_info(handle, &adi); + if (ACPI_FAILURE(status)) + goto err; + + /* + * For HWP0001, only SBA appears in ACPI namespace. It encloses the PCI + * root bridges, and its CSR space includes the IOC function. + */ + if (strncmp("HWP0001", adi->hardware_id.string, 7) == 0) { + hpa += ZX1_IOC_OFFSET; + /* zx1 based systems default to kernel page size iommu pages */ + if (!iovp_shift) + iovp_shift = min(PAGE_SHIFT, 16); + } + kfree(adi); + + /* + * default anything not caught above or specified on cmdline to 4k + * iommu page size + */ + if (!iovp_shift) + iovp_shift = 12; + + ioc_init(hpa, ioc); + /* setup NUMA node association */ + sba_map_ioc_to_node(ioc, handle); + return; + + err: + kfree(ioc); +} + +static const struct acpi_device_id hp_ioc_iommu_device_ids[] = { + {"HWP0001", 0}, + {"HWP0004", 0}, + {"", 0}, +}; + +static int acpi_sba_ioc_attach(struct acpi_device *device, + const struct acpi_device_id *not_used) +{ + struct ioc *ioc; + + ioc = kzalloc(sizeof(*ioc), GFP_KERNEL); + if (!ioc) + return -ENOMEM; + + ioc->next = ioc_found; + ioc_found = ioc; + ioc->handle = device->handle; + return 1; +} + + +static struct acpi_scan_handler acpi_sba_ioc_handler = { + .ids = hp_ioc_iommu_device_ids, + .attach = acpi_sba_ioc_attach, +}; + +static int __init acpi_sba_ioc_init_acpi(void) +{ + return acpi_scan_add_handler(&acpi_sba_ioc_handler); +} +/* This has to run before acpi_scan_init(). */ +arch_initcall(acpi_sba_ioc_init_acpi); + +static int sba_dma_supported (struct device *dev, u64 mask) +{ + /* make sure it's at least 32bit capable */ + return ((mask & 0xFFFFFFFFUL) == 0xFFFFFFFFUL); +} + +static const struct dma_map_ops sba_dma_ops = { + .alloc = sba_alloc_coherent, + .free = sba_free_coherent, + .map_page = sba_map_page, + .unmap_page = sba_unmap_page, + .map_sg = sba_map_sg_attrs, + .unmap_sg = sba_unmap_sg_attrs, + .dma_supported = sba_dma_supported, + .mmap = dma_common_mmap, + .get_sgtable = dma_common_get_sgtable, + .alloc_pages = dma_common_alloc_pages, + .free_pages = dma_common_free_pages, +}; + +static int __init +sba_init(void) +{ + /* + * If we are booting a kdump kernel, the sba_iommu will cause devices + * that were not shutdown properly to MCA as soon as they are turned + * back on. Our only option for a successful kdump kernel boot is to + * use swiotlb. + */ + if (is_kdump_kernel()) + return 0; + + /* + * ioc_found should be populated by the acpi_sba_ioc_handler's .attach() + * routine, but that only happens if acpi_scan_init() has already run. + */ + while (ioc_found) + acpi_sba_ioc_add(ioc_found); + + if (!ioc_list) + return 0; + + { + struct pci_bus *b = NULL; + while ((b = pci_find_next_bus(b)) != NULL) + sba_connect_bus(b); + } + + /* no need for swiotlb with the iommu */ + swiotlb_exit(); + dma_ops = &sba_dma_ops; + +#ifdef CONFIG_PROC_FS + ioc_proc_init(); +#endif + return 0; +} + +subsys_initcall(sba_init); /* must be initialized after ACPI etc., but before any drivers... */ + +static int __init +nosbagart(char *str) +{ + reserve_sba_gart = 0; + return 1; +} + +__setup("nosbagart", nosbagart); + +static int __init +sba_page_override(char *str) +{ + unsigned long page_size; + + page_size = memparse(str, &str); + switch (page_size) { + case 4096: + case 8192: + case 16384: + case 65536: + iovp_shift = ffs(page_size) - 1; + break; + default: + printk("%s: unknown/unsupported iommu page size %ld\n", + __func__, page_size); + } + + return 1; +} + +__setup("sbapagesize=",sba_page_override); diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild new file mode 100644 index 000000000000..aefae2efde9f --- /dev/null +++ b/arch/ia64/include/asm/Kbuild @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +generated-y += syscall_table.h +generic-y += agp.h +generic-y += kvm_para.h +generic-y += mcs_spinlock.h +generic-y += vtime.h diff --git a/arch/ia64/include/asm/acenv.h b/arch/ia64/include/asm/acenv.h new file mode 100644 index 000000000000..9d673cd4c2ad --- /dev/null +++ b/arch/ia64/include/asm/acenv.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * IA64 specific ACPICA environments and implementation + * + * Copyright (C) 2014, Intel Corporation + * Author: Lv Zheng + */ + +#ifndef _ASM_IA64_ACENV_H +#define _ASM_IA64_ACENV_H + +#include + +#define COMPILER_DEPENDENT_INT64 long +#define COMPILER_DEPENDENT_UINT64 unsigned long + +/* Asm macros */ + +static inline int +ia64_acpi_acquire_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = (((old & ~0x3) + 2) + ((old >> 1) & 0x1)); + val = ia64_cmpxchg4_acq(lock, new, old); + } while (unlikely (val != old)); + return (new < 3) ? -1 : 0; +} + +static inline int +ia64_acpi_release_global_lock(unsigned int *lock) +{ + unsigned int old, new, val; + do { + old = *lock; + new = old & ~0x3; + val = ia64_cmpxchg4_acq(lock, new, old); + } while (unlikely (val != old)); + return old & 0x1; +} + +#define ACPI_ACQUIRE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = ia64_acpi_acquire_global_lock(&facs->global_lock)) + +#define ACPI_RELEASE_GLOBAL_LOCK(facs, Acq) \ + ((Acq) = ia64_acpi_release_global_lock(&facs->global_lock)) + +#endif /* _ASM_IA64_ACENV_H */ diff --git a/arch/ia64/include/asm/acpi-ext.h b/arch/ia64/include/asm/acpi-ext.h new file mode 100644 index 000000000000..eaa57583d151 --- /dev/null +++ b/arch/ia64/include/asm/acpi-ext.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + * Bjorn Helgaas + * + * Vendor specific extensions to ACPI. + */ + +#ifndef _ASM_IA64_ACPI_EXT_H +#define _ASM_IA64_ACPI_EXT_H + +#include + +extern acpi_status hp_acpi_csr_space (acpi_handle, u64 *base, u64 *length); + +#endif /* _ASM_IA64_ACPI_EXT_H */ diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h new file mode 100644 index 000000000000..58500a964238 --- /dev/null +++ b/arch/ia64/include/asm/acpi.h @@ -0,0 +1,110 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 2000,2001 J.I. Lee + * Copyright (C) 2001,2002 Paul Diefenbaugh + */ + +#ifndef _ASM_ACPI_H +#define _ASM_ACPI_H + +#ifdef __KERNEL__ + +#include + +#include +#include +#include + + +extern int acpi_lapic; +#define acpi_disabled 0 /* ACPI always enabled on IA64 */ +#define acpi_noirq 0 /* ACPI always enabled on IA64 */ +#define acpi_pci_disabled 0 /* ACPI PCI always enabled on IA64 */ +#define acpi_strict 1 /* no ACPI spec workarounds on IA64 */ + +static inline bool acpi_has_cpu_in_madt(void) +{ + return !!acpi_lapic; +} + +#define acpi_processor_cstate_check(x) (x) /* no idle limits on IA64 :) */ +static inline void disable_acpi(void) { } + +int acpi_request_vector (u32 int_type); +int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); + +/* Low-level suspend routine. */ +extern int acpi_suspend_lowlevel(void); + +static inline unsigned long acpi_get_wakeup_address(void) +{ + return 0; +} + +/* + * Record the cpei override flag and current logical cpu. This is + * useful for CPU removal. + */ +extern unsigned int can_cpei_retarget(void); +extern unsigned int is_cpu_cpei_target(unsigned int cpu); +extern void set_cpei_target_cpu(unsigned int cpu); +extern unsigned int get_cpei_target_cpu(void); +extern void prefill_possible_map(void); +#ifdef CONFIG_ACPI_HOTPLUG_CPU +extern int additional_cpus; +#else +#define additional_cpus 0 +#endif + +#ifdef CONFIG_ACPI_NUMA +#if MAX_NUMNODES > 256 +#define MAX_PXM_DOMAINS MAX_NUMNODES +#else +#define MAX_PXM_DOMAINS (256) +#endif +extern int pxm_to_nid_map[MAX_PXM_DOMAINS]; +extern int __initdata nid_to_pxm_map[MAX_NUMNODES]; +#endif + +static inline bool arch_has_acpi_pdc(void) { return true; } +static inline void arch_acpi_set_proc_cap_bits(u32 *cap) +{ + *cap |= ACPI_PROC_CAP_EST_CAPABILITY_SMP; +} + +#ifdef CONFIG_ACPI_NUMA +extern cpumask_t early_cpu_possible_map; +#define for_each_possible_early_cpu(cpu) \ + for_each_cpu((cpu), &early_cpu_possible_map) + +static inline void per_cpu_scan_finalize(int min_cpus, int reserve_cpus) +{ + int low_cpu, high_cpu; + int cpu; + int next_nid = 0; + + low_cpu = cpumask_weight(&early_cpu_possible_map); + + high_cpu = max(low_cpu, min_cpus); + high_cpu = min(high_cpu + reserve_cpus, NR_CPUS); + + for (cpu = low_cpu; cpu < high_cpu; cpu++) { + cpumask_set_cpu(cpu, &early_cpu_possible_map); + if (node_cpuid[cpu].nid == NUMA_NO_NODE) { + node_cpuid[cpu].nid = next_nid; + next_nid++; + if (next_nid >= num_online_nodes()) + next_nid = 0; + } + } +} + +extern void acpi_numa_fixup(void); + +#endif /* CONFIG_ACPI_NUMA */ + +#endif /*__KERNEL__*/ + +#endif /*_ASM_ACPI_H*/ diff --git a/arch/ia64/include/asm/asm-offsets.h b/arch/ia64/include/asm/asm-offsets.h new file mode 100644 index 000000000000..d370ee36a182 --- /dev/null +++ b/arch/ia64/include/asm/asm-offsets.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/asm-prototypes.h b/arch/ia64/include/asm/asm-prototypes.h new file mode 100644 index 000000000000..a96689447a74 --- /dev/null +++ b/arch/ia64/include/asm/asm-prototypes.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_ASM_PROTOTYPES_H +#define _ASM_IA64_ASM_PROTOTYPES_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern const char ia64_ivt[]; + +signed int __divsi3(signed int, unsigned int); +signed int __modsi3(signed int, unsigned int); + +signed long long __divdi3(signed long long, unsigned long long); +signed long long __moddi3(signed long long, unsigned long long); + +unsigned int __udivsi3(unsigned int, unsigned int); +unsigned int __umodsi3(unsigned int, unsigned int); + +unsigned long long __udivdi3(unsigned long long, unsigned long long); +unsigned long long __umoddi3(unsigned long long, unsigned long long); + +#endif /* _ASM_IA64_ASM_PROTOTYPES_H */ diff --git a/arch/ia64/include/asm/asmmacro.h b/arch/ia64/include/asm/asmmacro.h new file mode 100644 index 000000000000..52619c517f09 --- /dev/null +++ b/arch/ia64/include/asm/asmmacro.h @@ -0,0 +1,136 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_ASMMACRO_H +#define _ASM_IA64_ASMMACRO_H + +/* + * Copyright (C) 2000-2001, 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#define ENTRY(name) \ + .align 32; \ + .proc name; \ +name: + +#define ENTRY_MIN_ALIGN(name) \ + .align 16; \ + .proc name; \ +name: + +#define GLOBAL_ENTRY(name) \ + .global name; \ + ENTRY(name) + +#define END(name) \ + .endp name + +/* + * Helper macros to make unwind directives more readable: + */ + +/* prologue_gr: */ +#define ASM_UNW_PRLG_RP 0x8 +#define ASM_UNW_PRLG_PFS 0x4 +#define ASM_UNW_PRLG_PSP 0x2 +#define ASM_UNW_PRLG_PR 0x1 +#define ASM_UNW_PRLG_GRSAVE(ninputs) (32+(ninputs)) + +/* + * Helper macros for accessing user memory. + * + * When adding any new .section/.previous entries here, make sure to + * also add it to the DISCARD section in arch/ia64/kernel/gate.lds.S or + * unpleasant things will happen. + */ + + .section "__ex_table", "a" // declare section & section attributes + .previous + +# define EX(y,x...) \ + .xdata4 "__ex_table", 99f-., y-.; \ + [99:] x +# define EXCLR(y,x...) \ + .xdata4 "__ex_table", 99f-., y-.+4; \ + [99:] x + +/* + * Tag MCA recoverable instruction ranges. + */ + + .section "__mca_table", "a" // declare section & section attributes + .previous + +# define MCA_RECOVER_RANGE(y) \ + .xdata4 "__mca_table", y-., 99f-.; \ + [99:] + +/* + * Mark instructions that need a load of a virtual address patched to be + * a load of a physical address. We use this either in critical performance + * path (ivt.S - TLB miss processing) or in places where it might not be + * safe to use a "tpa" instruction (mca_asm.S - error recovery). + */ + .section ".data..patch.vtop", "a" // declare section & section attributes + .previous + +#define LOAD_PHYSICAL(pr, reg, obj) \ +[1:](pr)movl reg = obj; \ + .xdata4 ".data..patch.vtop", 1b-. + +/* + * For now, we always put in the McKinley E9 workaround. On CPUs that don't need it, + * we'll patch out the work-around bundles with NOPs, so their impact is minimal. + */ +#define DO_MCKINLEY_E9_WORKAROUND + +#ifdef DO_MCKINLEY_E9_WORKAROUND + .section ".data..patch.mckinley_e9", "a" + .previous +/* workaround for Itanium 2 Errata 9: */ +# define FSYS_RETURN \ + .xdata4 ".data..patch.mckinley_e9", 1f-.; \ +1:{ .mib; \ + nop.m 0; \ + mov r16=ar.pfs; \ + br.call.sptk.many b7=2f;; \ + }; \ +2:{ .mib; \ + nop.m 0; \ + mov ar.pfs=r16; \ + br.ret.sptk.many b6;; \ + } +#else +# define FSYS_RETURN br.ret.sptk.many b6 +#endif + +/* + * If physical stack register size is different from DEF_NUM_STACK_REG, + * dynamically patch the kernel for correct size. + */ + .section ".data..patch.phys_stack_reg", "a" + .previous +#define LOAD_PHYS_STACK_REG_SIZE(reg) \ +[1:] adds reg=IA64_NUM_PHYS_STACK_REG*8+8,r0; \ + .xdata4 ".data..patch.phys_stack_reg", 1b-. + +/* + * Up until early 2004, use of .align within a function caused bad unwind info. + * TEXT_ALIGN(n) expands into ".align n" if a fixed GAS is available or into nothing + * otherwise. + */ +#ifdef HAVE_WORKING_TEXT_ALIGN +# define TEXT_ALIGN(n) .align n +#else +# define TEXT_ALIGN(n) +#endif + +#ifdef HAVE_SERIALIZE_DIRECTIVE +# define dv_serialize_data .serialize.data +# define dv_serialize_instruction .serialize.instruction +#else +# define dv_serialize_data +# define dv_serialize_instruction +#endif + +#endif /* _ASM_IA64_ASMMACRO_H */ diff --git a/arch/ia64/include/asm/atomic.h b/arch/ia64/include/asm/atomic.h new file mode 100644 index 000000000000..6540a628d257 --- /dev/null +++ b/arch/ia64/include/asm/atomic.h @@ -0,0 +1,216 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_ATOMIC_H +#define _ASM_IA64_ATOMIC_H + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + * + * NOTE: don't mess with the types below! The "unsigned long" and + * "int" types were carefully placed so as to ensure proper operation + * of the macros. + * + * Copyright (C) 1998, 1999, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include + +#include +#include + + +#define ATOMIC64_INIT(i) { (i) } + +#define arch_atomic_read(v) READ_ONCE((v)->counter) +#define arch_atomic64_read(v) READ_ONCE((v)->counter) + +#define arch_atomic_set(v,i) WRITE_ONCE(((v)->counter), (i)) +#define arch_atomic64_set(v,i) WRITE_ONCE(((v)->counter), (i)) + +#define ATOMIC_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = arch_atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return new; \ +} + +#define ATOMIC_FETCH_OP(op, c_op) \ +static __inline__ int \ +ia64_atomic_fetch_##op (int i, atomic_t *v) \ +{ \ + __s32 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = arch_atomic_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic_t)) != old); \ + return old; \ +} + +#define ATOMIC_OPS(op, c_op) \ + ATOMIC_OP(op, c_op) \ + ATOMIC_FETCH_OP(op, c_op) + +ATOMIC_OPS(add, +) +ATOMIC_OPS(sub, -) + +#ifdef __OPTIMIZE__ +#define __ia64_atomic_const(i) \ + static const int __ia64_atomic_p = __builtin_constant_p(i) ? \ + ((i) == 1 || (i) == 4 || (i) == 8 || (i) == 16 || \ + (i) == -1 || (i) == -4 || (i) == -8 || (i) == -16) : 0;\ + __ia64_atomic_p +#else +#define __ia64_atomic_const(i) 0 +#endif + +#define arch_atomic_add_return(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic_add(__ia64_aar_i, v); \ +}) + +#define arch_atomic_sub_return(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic_sub(__ia64_asr_i, v); \ +}) + +#define arch_atomic_fetch_add(i,v) \ +({ \ + int __ia64_aar_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_add(__ia64_aar_i, v); \ +}) + +#define arch_atomic_fetch_sub(i,v) \ +({ \ + int __ia64_asr_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC_FETCH_OP(and, &) +ATOMIC_FETCH_OP(or, |) +ATOMIC_FETCH_OP(xor, ^) + +#define arch_atomic_and(i,v) (void)ia64_atomic_fetch_and(i,v) +#define arch_atomic_or(i,v) (void)ia64_atomic_fetch_or(i,v) +#define arch_atomic_xor(i,v) (void)ia64_atomic_fetch_xor(i,v) + +#define arch_atomic_fetch_and(i,v) ia64_atomic_fetch_and(i,v) +#define arch_atomic_fetch_or(i,v) ia64_atomic_fetch_or(i,v) +#define arch_atomic_fetch_xor(i,v) ia64_atomic_fetch_xor(i,v) + +#undef ATOMIC_OPS +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP + +#define ATOMIC64_OP(op, c_op) \ +static __inline__ s64 \ +ia64_atomic64_##op (s64 i, atomic64_t *v) \ +{ \ + s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = arch_atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return new; \ +} + +#define ATOMIC64_FETCH_OP(op, c_op) \ +static __inline__ s64 \ +ia64_atomic64_fetch_##op (s64 i, atomic64_t *v) \ +{ \ + s64 old, new; \ + CMPXCHG_BUGCHECK_DECL \ + \ + do { \ + CMPXCHG_BUGCHECK(v); \ + old = arch_atomic64_read(v); \ + new = old c_op i; \ + } while (ia64_cmpxchg(acq, v, old, new, sizeof(atomic64_t)) != old); \ + return old; \ +} + +#define ATOMIC64_OPS(op, c_op) \ + ATOMIC64_OP(op, c_op) \ + ATOMIC64_FETCH_OP(op, c_op) + +ATOMIC64_OPS(add, +) +ATOMIC64_OPS(sub, -) + +#define arch_atomic64_add_return(i,v) \ +({ \ + s64 __ia64_aar_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetch_and_add(__ia64_aar_i, &(v)->counter) \ + : ia64_atomic64_add(__ia64_aar_i, v); \ +}) + +#define arch_atomic64_sub_return(i,v) \ +({ \ + s64 __ia64_asr_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetch_and_add(-__ia64_asr_i, &(v)->counter) \ + : ia64_atomic64_sub(__ia64_asr_i, v); \ +}) + +#define arch_atomic64_fetch_add(i,v) \ +({ \ + s64 __ia64_aar_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetchadd(__ia64_aar_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_add(__ia64_aar_i, v); \ +}) + +#define arch_atomic64_fetch_sub(i,v) \ +({ \ + s64 __ia64_asr_i = (i); \ + __ia64_atomic_const(i) \ + ? ia64_fetchadd(-__ia64_asr_i, &(v)->counter, acq) \ + : ia64_atomic64_fetch_sub(__ia64_asr_i, v); \ +}) + +ATOMIC64_FETCH_OP(and, &) +ATOMIC64_FETCH_OP(or, |) +ATOMIC64_FETCH_OP(xor, ^) + +#define arch_atomic64_and(i,v) (void)ia64_atomic64_fetch_and(i,v) +#define arch_atomic64_or(i,v) (void)ia64_atomic64_fetch_or(i,v) +#define arch_atomic64_xor(i,v) (void)ia64_atomic64_fetch_xor(i,v) + +#define arch_atomic64_fetch_and(i,v) ia64_atomic64_fetch_and(i,v) +#define arch_atomic64_fetch_or(i,v) ia64_atomic64_fetch_or(i,v) +#define arch_atomic64_fetch_xor(i,v) ia64_atomic64_fetch_xor(i,v) + +#undef ATOMIC64_OPS +#undef ATOMIC64_FETCH_OP +#undef ATOMIC64_OP + +#define arch_atomic_add(i,v) (void)arch_atomic_add_return((i), (v)) +#define arch_atomic_sub(i,v) (void)arch_atomic_sub_return((i), (v)) + +#define arch_atomic64_add(i,v) (void)arch_atomic64_add_return((i), (v)) +#define arch_atomic64_sub(i,v) (void)arch_atomic64_sub_return((i), (v)) + +#endif /* _ASM_IA64_ATOMIC_H */ diff --git a/arch/ia64/include/asm/barrier.h b/arch/ia64/include/asm/barrier.h new file mode 100644 index 000000000000..751cdd353446 --- /dev/null +++ b/arch/ia64/include/asm/barrier.h @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Memory barrier definitions. This is based on information published + * in the Processor Abstraction Layer and the System Abstraction Layer + * manual. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ +#ifndef _ASM_IA64_BARRIER_H +#define _ASM_IA64_BARRIER_H + +#include + +/* + * Macros to force memory ordering. In these descriptions, "previous" + * and "subsequent" refer to program order; "visible" means that all + * architecturally visible effects of a memory access have occurred + * (at a minimum, this means the memory has been read or written). + * + * wmb(): Guarantees that all preceding stores to memory- + * like regions are visible before any subsequent + * stores and that all following stores will be + * visible only after all previous stores. + * rmb(): Like wmb(), but for reads. + * mb(): wmb()/rmb() combo, i.e., all previous memory + * accesses are visible before all subsequent + * accesses and vice versa. This is also known as + * a "fence." + * + * Note: "mb()" and its variants cannot be used as a fence to order + * accesses to memory mapped I/O registers. For that, mf.a needs to + * be used. However, we don't want to always use mf.a because (a) + * it's (presumably) much slower than mf and (b) mf.a is supported for + * sequential memory pages only. + */ +#define mb() ia64_mf() +#define rmb() mb() +#define wmb() mb() + +#define dma_rmb() mb() +#define dma_wmb() mb() + +# define __smp_mb() mb() + +#define __smp_mb__before_atomic() barrier() +#define __smp_mb__after_atomic() barrier() + +/* + * IA64 GCC turns volatile stores into st.rel and volatile loads into ld.acq no + * need for asm trickery! + */ + +#define __smp_store_release(p, v) \ +do { \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define __smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + compiletime_assert_atomic_type(*p); \ + barrier(); \ + ___p1; \ +}) + +/* + * The group barrier in front of the rsm & ssm are necessary to ensure + * that none of the previous instructions in the same group are + * affected by the rsm/ssm. + */ + +#include + +#endif /* _ASM_IA64_BARRIER_H */ diff --git a/arch/ia64/include/asm/bitops.h b/arch/ia64/include/asm/bitops.h new file mode 100644 index 000000000000..39841b8e6efc --- /dev/null +++ b/arch/ia64/include/asm/bitops.h @@ -0,0 +1,406 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_BITOPS_H +#define _ASM_IA64_BITOPS_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 02/06/02 find_next_bit() and find_first_bit() added from Erich Focht's ia64 + * O(1) scheduler patch + */ + +#ifndef _LINUX_BITOPS_H +#error only can be included directly +#endif + +#include +#include +#include +#include + +/** + * set_bit - Atomically set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * This function is atomic and may not be reordered. See __set_bit() + * if you do not require the atomic guarantees. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + * + * The address must be (at least) "long" aligned. + * Note that there are driver (e.g., eepro100) which use these operations to + * operate on hw-defined data-structures, so we can't easily change these + * operations to force a bigger alignment. + * + * bit 0 is the LSB of addr; bit 32 is the LSB of (addr+1). + */ +static __inline__ void +set_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = 1 << (nr & 31); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old | bit; + } while (cmpxchg_acq(m, old, new) != old); +} + +/** + * arch___set_bit - Set a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * + * Unlike set_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __always_inline void +arch___set_bit(unsigned long nr, volatile unsigned long *addr) +{ + *((__u32 *) addr + (nr >> 5)) |= (1 << (nr & 31)); +} + +/** + * clear_bit - Clears a bit in memory + * @nr: Bit to clear + * @addr: Address to start counting from + * + * clear_bit() is atomic and may not be reordered. However, it does + * not contain a memory barrier, so if it is used for locking purposes, + * you should call smp_mb__before_atomic() and/or smp_mb__after_atomic() + * in order to ensure changes are visible on other processors. + */ +static __inline__ void +clear_bit (int nr, volatile void *addr) +{ + __u32 mask, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + mask = ~(1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old & mask; + } while (cmpxchg_acq(m, old, new) != old); +} + +/** + * arch___clear_bit - Clears a bit in memory (non-atomic version) + * @nr: the bit to clear + * @addr: the address to start counting from + * + * Unlike clear_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __always_inline void +arch___clear_bit(unsigned long nr, volatile unsigned long *addr) +{ + *((__u32 *) addr + (nr >> 5)) &= ~(1 << (nr & 31)); +} + +/** + * change_bit - Toggle a bit in memory + * @nr: Bit to toggle + * @addr: Address to start counting from + * + * change_bit() is atomic and may not be reordered. + * Note that @nr may be almost arbitrarily large; this function is not + * restricted to acting on a single-word quantity. + */ +static __inline__ void +change_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = (1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old ^ bit; + } while (cmpxchg_acq(m, old, new) != old); +} + +/** + * arch___change_bit - Toggle a bit in memory + * @nr: the bit to toggle + * @addr: the address to start counting from + * + * Unlike change_bit(), this function is non-atomic and may be reordered. + * If it's called on the same region of memory simultaneously, the effect + * may be that only one operation succeeds. + */ +static __always_inline void +arch___change_bit(unsigned long nr, volatile unsigned long *addr) +{ + *((__u32 *) addr + (nr >> 5)) ^= (1 << (nr & 31)); +} + +/** + * test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies the acquisition side of the memory barrier. + */ +static __inline__ int +test_and_set_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = 1 << (nr & 31); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old | bit; + } while (cmpxchg_acq(m, old, new) != old); + return (old & bit) != 0; +} + + +/** + * arch___test_and_set_bit - Set a bit and return its old value + * @nr: Bit to set + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __always_inline bool +arch___test_and_set_bit(unsigned long nr, volatile unsigned long *addr) +{ + __u32 *p = (__u32 *) addr + (nr >> 5); + __u32 m = 1 << (nr & 31); + int oldbitset = (*p & m) != 0; + + *p |= m; + return oldbitset; +} + +/** + * test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies the acquisition side of the memory barrier. + */ +static __inline__ int +test_and_clear_bit (int nr, volatile void *addr) +{ + __u32 mask, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + mask = ~(1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old & mask; + } while (cmpxchg_acq(m, old, new) != old); + return (old & ~mask) != 0; +} + +/** + * arch___test_and_clear_bit - Clear a bit and return its old value + * @nr: Bit to clear + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + * If two examples of this operation race, one can appear to succeed + * but actually fail. You must protect multiple accesses with a lock. + */ +static __always_inline bool +arch___test_and_clear_bit(unsigned long nr, volatile unsigned long *addr) +{ + __u32 *p = (__u32 *) addr + (nr >> 5); + __u32 m = 1 << (nr & 31); + int oldbitset = (*p & m) != 0; + + *p &= ~m; + return oldbitset; +} + +/** + * test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is atomic and cannot be reordered. + * It also implies the acquisition side of the memory barrier. + */ +static __inline__ int +test_and_change_bit (int nr, volatile void *addr) +{ + __u32 bit, old, new; + volatile __u32 *m; + CMPXCHG_BUGCHECK_DECL + + m = (volatile __u32 *) addr + (nr >> 5); + bit = (1 << (nr & 31)); + do { + CMPXCHG_BUGCHECK(m); + old = *m; + new = old ^ bit; + } while (cmpxchg_acq(m, old, new) != old); + return (old & bit) != 0; +} + +/** + * arch___test_and_change_bit - Change a bit and return its old value + * @nr: Bit to change + * @addr: Address to count from + * + * This operation is non-atomic and can be reordered. + */ +static __always_inline bool +arch___test_and_change_bit(unsigned long nr, volatile unsigned long *addr) +{ + __u32 old, bit = (1 << (nr & 31)); + __u32 *m = (__u32 *) addr + (nr >> 5); + + old = *m; + *m = old ^ bit; + return (old & bit) != 0; +} + +#define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire + +/** + * ffz - find the first zero bit in a long word + * @x: The long word to find the bit in + * + * Returns the bit-number (0..63) of the first (least significant) zero bit. + * Undefined if no zero exists, so code should check against ~0UL first... + */ +static inline unsigned long +ffz (unsigned long x) +{ + unsigned long result; + + result = ia64_popcnt(x & (~x - 1)); + return result; +} + +/** + * __ffs - find first bit in word. + * @x: The word to search + * + * Undefined if no bit exists, so code should check against 0 first. + */ +static __inline__ unsigned long +__ffs (unsigned long x) +{ + unsigned long result; + + result = ia64_popcnt((x-1) & ~x); + return result; +} + +#ifdef __KERNEL__ + +/* + * Return bit number of last (most-significant) bit set. Undefined + * for x==0. Bits are numbered from 0..63 (e.g., ia64_fls(9) == 3). + */ +static inline unsigned long +ia64_fls (unsigned long x) +{ + long double d = x; + long exp; + + exp = ia64_getf_exp(d); + return exp - 0xffff; +} + +/* + * Find the last (most significant) bit set. Returns 0 for x==0 and + * bits are numbered from 1..32 (e.g., fls(9) == 4). + */ +static inline int fls(unsigned int t) +{ + unsigned long x = t & 0xffffffffu; + + if (!x) + return 0; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return ia64_popcnt(x); +} + +/* + * Find the last (most significant) bit set. Undefined for x==0. + * Bits are numbered from 0..63 (e.g., __fls(9) == 3). + */ +static inline unsigned long +__fls (unsigned long x) +{ + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + x |= x >> 32; + return ia64_popcnt(x) - 1; +} + +#include + +#include + +/* + * hweightN: returns the hamming weight (i.e. the number + * of bits set) of a N-bit word + */ +static __inline__ unsigned long __arch_hweight64(unsigned long x) +{ + unsigned long result; + result = ia64_popcnt(x); + return result; +} + +#define __arch_hweight32(x) ((unsigned int) __arch_hweight64((x) & 0xfffffffful)) +#define __arch_hweight16(x) ((unsigned int) __arch_hweight64((x) & 0xfffful)) +#define __arch_hweight8(x) ((unsigned int) __arch_hweight64((x) & 0xfful)) + +#include + +#endif /* __KERNEL__ */ + +#ifdef __KERNEL__ + +#include + +#include + +#include + +#include + +#include + +#endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_BITOPS_H */ diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h new file mode 100644 index 000000000000..66b37a532765 --- /dev/null +++ b/arch/ia64/include/asm/bug.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_BUG_H +#define _ASM_IA64_BUG_H + +#ifdef CONFIG_BUG +#define ia64_abort() __builtin_trap() +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ + ia64_abort(); \ +} while (0) + +/* should this BUG be made generic? */ +#define HAVE_ARCH_BUG +#endif + +#include + +#endif diff --git a/arch/ia64/include/asm/cache.h b/arch/ia64/include/asm/cache.h new file mode 100644 index 000000000000..2f1c70647068 --- /dev/null +++ b/arch/ia64/include/asm/cache.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_CACHE_H +#define _ASM_IA64_CACHE_H + + +/* + * Copyright (C) 1998-2000 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* Bytes per L1 (data) cache line. */ +#define L1_CACHE_SHIFT CONFIG_IA64_L1_CACHE_SHIFT +#define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) + +#ifdef CONFIG_SMP +# define SMP_CACHE_SHIFT L1_CACHE_SHIFT +# define SMP_CACHE_BYTES L1_CACHE_BYTES +#else + /* + * The "aligned" directive can only _increase_ alignment, so this is + * safe and provides an easy way to avoid wasting space on a + * uni-processor: + */ +# define SMP_CACHE_SHIFT 3 +# define SMP_CACHE_BYTES (1 << 3) +#endif + +#define __read_mostly __section(".data..read_mostly") + +#endif /* _ASM_IA64_CACHE_H */ diff --git a/arch/ia64/include/asm/cacheflush.h b/arch/ia64/include/asm/cacheflush.h new file mode 100644 index 000000000000..eac493fa9e0d --- /dev/null +++ b/arch/ia64/include/asm/cacheflush.h @@ -0,0 +1,39 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_CACHEFLUSH_H +#define _ASM_IA64_CACHEFLUSH_H + +/* + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + +#include + +#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 +static inline void flush_dcache_folio(struct folio *folio) +{ + clear_bit(PG_arch_1, &folio->flags); +} +#define flush_dcache_folio flush_dcache_folio + +static inline void flush_dcache_page(struct page *page) +{ + flush_dcache_folio(page_folio(page)); +} + +extern void flush_icache_range(unsigned long start, unsigned long end); +#define flush_icache_range flush_icache_range +extern void clflush_cache_range(void *addr, int size); + +#define flush_icache_user_page(vma, page, user_addr, len) \ +do { \ + unsigned long _addr = (unsigned long) page_address(page) + ((user_addr) & ~PAGE_MASK); \ + flush_icache_range(_addr, _addr + (len)); \ +} while (0) + +#include + +#endif /* _ASM_IA64_CACHEFLUSH_H */ diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h new file mode 100644 index 000000000000..f3026213aa32 --- /dev/null +++ b/arch/ia64/include/asm/checksum.h @@ -0,0 +1,63 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_CHECKSUM_H +#define _ASM_IA64_CHECKSUM_H + +/* + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + */ +extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * Computes the checksum of the TCP/UDP pseudo-header returns a 16-bit + * checksum, already complemented + */ +extern __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum); + +extern __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum); + +/* + * Computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +extern __wsum csum_partial(const void *buff, int len, __wsum sum); + +/* + * This routine is used for miscellaneous IP-like checksums, mainly in + * icmp.c + */ +extern __sum16 ip_compute_csum(const void *buff, int len); + +/* + * Fold a partial checksum without adding pseudo headers. + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +#define _HAVE_ARCH_IPV6_CSUM 1 +struct in6_addr; +extern __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum); + +#endif /* _ASM_IA64_CHECKSUM_H */ diff --git a/arch/ia64/include/asm/clocksource.h b/arch/ia64/include/asm/clocksource.h new file mode 100644 index 000000000000..71a517751afa --- /dev/null +++ b/arch/ia64/include/asm/clocksource.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* IA64-specific clocksource additions */ + +#ifndef _ASM_IA64_CLOCKSOURCE_H +#define _ASM_IA64_CLOCKSOURCE_H + +struct arch_clocksource_data { + void *fsys_mmio; /* used by fsyscall asm code */ +}; + +#endif /* _ASM_IA64_CLOCKSOURCE_H */ diff --git a/arch/ia64/include/asm/cmpxchg.h b/arch/ia64/include/asm/cmpxchg.h new file mode 100644 index 000000000000..d85ee1a0a227 --- /dev/null +++ b/arch/ia64/include/asm/cmpxchg.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_CMPXCHG_H +#define _ASM_IA64_CMPXCHG_H + +#include + +#define arch_xchg(ptr, x) \ +({(__typeof__(*(ptr))) __arch_xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));}) + +#define arch_cmpxchg(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) +#define arch_cmpxchg64(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) + +#define arch_cmpxchg_local arch_cmpxchg +#define arch_cmpxchg64_local arch_cmpxchg64 + +#ifdef CONFIG_IA64_DEBUG_CMPXCHG +# define CMPXCHG_BUGCHECK_DECL int _cmpxchg_bugcheck_count = 128; +# define CMPXCHG_BUGCHECK(v) \ +do { \ + if (_cmpxchg_bugcheck_count-- <= 0) { \ + void *ip; \ + extern int _printk(const char *fmt, ...); \ + ip = (void *) ia64_getreg(_IA64_REG_IP); \ + _printk("CMPXCHG_BUGCHECK: stuck at %p on word %p\n", ip, (v));\ + break; \ + } \ +} while (0) +#else /* !CONFIG_IA64_DEBUG_CMPXCHG */ +# define CMPXCHG_BUGCHECK_DECL +# define CMPXCHG_BUGCHECK(v) +#endif /* !CONFIG_IA64_DEBUG_CMPXCHG */ + +#endif /* _ASM_IA64_CMPXCHG_H */ diff --git a/arch/ia64/include/asm/cpu.h b/arch/ia64/include/asm/cpu.h new file mode 100644 index 000000000000..db125df9e088 --- /dev/null +++ b/arch/ia64/include/asm/cpu.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_CPU_H_ +#define _ASM_IA64_CPU_H_ + +#include +#include +#include +#include + +struct ia64_cpu { + struct cpu cpu; +}; + +DECLARE_PER_CPU(struct ia64_cpu, cpu_devices); + +DECLARE_PER_CPU(int, cpu_state); + +#ifdef CONFIG_HOTPLUG_CPU +extern int arch_register_cpu(int num); +extern void arch_unregister_cpu(int); +#endif + +#endif /* _ASM_IA64_CPU_H_ */ diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h new file mode 100644 index 000000000000..7f28c3564d5d --- /dev/null +++ b/arch/ia64/include/asm/cputime.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Definitions for measuring cputime on ia64 machines. + * + * Based on . + * + * Copyright (C) 2007 FUJITSU LIMITED + * Copyright (C) 2007 Hidetoshi Seto + * + * If we have CONFIG_VIRT_CPU_ACCOUNTING_NATIVE, we measure cpu time in nsec. + * Otherwise we measure cpu time in jiffies using the generic definitions. + */ + +#ifndef __IA64_CPUTIME_H +#define __IA64_CPUTIME_H + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +extern void arch_vtime_task_switch(struct task_struct *tsk); +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#endif /* __IA64_CPUTIME_H */ diff --git a/arch/ia64/include/asm/current.h b/arch/ia64/include/asm/current.h new file mode 100644 index 000000000000..86fbcc88dff2 --- /dev/null +++ b/arch/ia64/include/asm/current.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_CURRENT_H +#define _ASM_IA64_CURRENT_H + +/* + * Modified 1998-2000 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +#include + +/* + * In kernel mode, thread pointer (r13) is used to point to the current task + * structure. + */ +#define current ((struct task_struct *) ia64_getreg(_IA64_REG_TP)) + +#endif /* _ASM_IA64_CURRENT_H */ diff --git a/arch/ia64/include/asm/cyclone.h b/arch/ia64/include/asm/cyclone.h new file mode 100644 index 000000000000..a481393647e9 --- /dev/null +++ b/arch/ia64/include/asm/cyclone.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef ASM_IA64_CYCLONE_H +#define ASM_IA64_CYCLONE_H + +#ifdef CONFIG_IA64_CYCLONE +extern int use_cyclone; +extern void __init cyclone_setup(void); +#else /* CONFIG_IA64_CYCLONE */ +#define use_cyclone 0 +static inline void cyclone_setup(void) +{ + printk(KERN_ERR "Cyclone Counter: System not configured" + " w/ CONFIG_IA64_CYCLONE.\n"); +} +#endif /* CONFIG_IA64_CYCLONE */ +#endif /* !ASM_IA64_CYCLONE_H */ diff --git a/arch/ia64/include/asm/delay.h b/arch/ia64/include/asm/delay.h new file mode 100644 index 000000000000..0227ac586107 --- /dev/null +++ b/arch/ia64/include/asm/delay.h @@ -0,0 +1,89 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_DELAY_H +#define _ASM_IA64_DELAY_H + +/* + * Delay routines using a pre-computed "cycles/usec" value. + * + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ + +#include +#include +#include + +#include +#include + +static __inline__ void +ia64_set_itm (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_ITM, val); + ia64_srlz_d(); +} + +static __inline__ unsigned long +ia64_get_itm (void) +{ + unsigned long result; + + result = ia64_getreg(_IA64_REG_CR_ITM); + ia64_srlz_d(); + return result; +} + +static __inline__ void +ia64_set_itv (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_ITV, val); + ia64_srlz_d(); +} + +static __inline__ unsigned long +ia64_get_itv (void) +{ + return ia64_getreg(_IA64_REG_CR_ITV); +} + +static __inline__ void +ia64_set_itc (unsigned long val) +{ + ia64_setreg(_IA64_REG_AR_ITC, val); + ia64_srlz_d(); +} + +static __inline__ unsigned long +ia64_get_itc (void) +{ + unsigned long result; + + result = ia64_getreg(_IA64_REG_AR_ITC); + ia64_barrier(); +#ifdef CONFIG_ITANIUM + while (unlikely((__s32) result == -1)) { + result = ia64_getreg(_IA64_REG_AR_ITC); + ia64_barrier(); + } +#endif + return result; +} + +extern void ia64_delay_loop (unsigned long loops); + +static __inline__ void +__delay (unsigned long loops) +{ + if (unlikely(loops < 1)) + return; + + ia64_delay_loop (loops - 1); +} + +extern void udelay (unsigned long usecs); + +#endif /* _ASM_IA64_DELAY_H */ diff --git a/arch/ia64/include/asm/device.h b/arch/ia64/include/asm/device.h new file mode 100644 index 000000000000..918b198cd5bb --- /dev/null +++ b/arch/ia64/include/asm/device.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Arch specific extensions to struct device + */ +#ifndef _ASM_IA64_DEVICE_H +#define _ASM_IA64_DEVICE_H + +struct dev_archdata { +}; + +struct pdev_archdata { +}; + +#endif /* _ASM_IA64_DEVICE_H */ diff --git a/arch/ia64/include/asm/div64.h b/arch/ia64/include/asm/div64.h new file mode 100644 index 000000000000..6cd978cefb28 --- /dev/null +++ b/arch/ia64/include/asm/div64.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h new file mode 100644 index 000000000000..af6fa8e1597c --- /dev/null +++ b/arch/ia64/include/asm/dma-mapping.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_DMA_MAPPING_H +#define _ASM_IA64_DMA_MAPPING_H + +/* + * Copyright (C) 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ +extern const struct dma_map_ops *dma_ops; + +static inline const struct dma_map_ops *get_arch_dma_ops(void) +{ + return dma_ops; +} + +#endif /* _ASM_IA64_DMA_MAPPING_H */ diff --git a/arch/ia64/include/asm/dma.h b/arch/ia64/include/asm/dma.h new file mode 100644 index 000000000000..eaed2626ffda --- /dev/null +++ b/arch/ia64/include/asm/dma.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_DMA_H +#define _ASM_IA64_DMA_H + +/* + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include /* need byte IO */ + +extern unsigned long MAX_DMA_ADDRESS; + +#define free_dma(x) + +#endif /* _ASM_IA64_DMA_H */ diff --git a/arch/ia64/include/asm/dmi.h b/arch/ia64/include/asm/dmi.h new file mode 100644 index 000000000000..ecd9e0a0f5f9 --- /dev/null +++ b/arch/ia64/include/asm/dmi.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_DMI_H +#define _ASM_DMI_H 1 + +#include +#include + +/* Use normal IO mappings for DMI */ +#define dmi_early_remap ioremap +#define dmi_early_unmap(x, l) iounmap(x) +#define dmi_remap ioremap +#define dmi_unmap iounmap +#define dmi_alloc(l) kzalloc(l, GFP_ATOMIC) + +#endif diff --git a/arch/ia64/include/asm/early_ioremap.h b/arch/ia64/include/asm/early_ioremap.h new file mode 100644 index 000000000000..934191b1e2e3 --- /dev/null +++ b/arch/ia64/include/asm/early_ioremap.h @@ -0,0 +1,11 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_EARLY_IOREMAP_H +#define _ASM_IA64_EARLY_IOREMAP_H + +extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); +#define early_memremap(phys_addr, size) early_ioremap(phys_addr, size) + +extern void early_iounmap (volatile void __iomem *addr, unsigned long size); +#define early_memunmap(addr, size) early_iounmap(addr, size) + +#endif diff --git a/arch/ia64/include/asm/efi.h b/arch/ia64/include/asm/efi.h new file mode 100644 index 000000000000..6a4a50d8f19a --- /dev/null +++ b/arch/ia64/include/asm/efi.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_EFI_H +#define _ASM_EFI_H + +typedef int (*efi_freemem_callback_t) (u64 start, u64 end, void *arg); + +void *efi_get_pal_addr(void); +void efi_map_pal_code(void); +void efi_memmap_walk(efi_freemem_callback_t, void *); +void efi_memmap_walk_uc(efi_freemem_callback_t, void *); +void efi_gettimeofday(struct timespec64 *ts); + +#endif diff --git a/arch/ia64/include/asm/elf.h b/arch/ia64/include/asm/elf.h new file mode 100644 index 000000000000..2ef5f9966ad1 --- /dev/null +++ b/arch/ia64/include/asm/elf.h @@ -0,0 +1,233 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_ELF_H +#define _ASM_IA64_ELF_H + +/* + * ELF-specific definitions. + * + * Copyright (C) 1998-1999, 2002-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include +#include +#include + +/* + * This is used to ensure we don't load something for the wrong architecture. + */ +#define elf_check_arch(x) ((x)->e_machine == EM_IA_64) + +/* + * These are used to set parameters in the core dumps. + */ +#define ELF_CLASS ELFCLASS64 +#define ELF_DATA ELFDATA2LSB +#define ELF_ARCH EM_IA_64 + +#define CORE_DUMP_USE_REGSET + +/* Least-significant four bits of ELF header's e_flags are OS-specific. The bits are + interpreted as follows by Linux: */ +#define EF_IA_64_LINUX_EXECUTABLE_STACK 0x1 /* is stack (& heap) executable by default? */ + +#define ELF_EXEC_PAGESIZE PAGE_SIZE + +/* + * This is the location that an ET_DYN program is loaded if exec'ed. + * Typical use of this is to invoke "./ld.so someprog" to test out a + * new version of the loader. We need to make sure that it is out of + * the way of the program that it will "exec", and that there is + * sufficient room for the brk. + */ +#define ELF_ET_DYN_BASE (TASK_UNMAPPED_BASE + 0x800000000UL) + +#define PT_IA_64_UNWIND 0x70000001 + +/* IA-64 relocations: */ +#define R_IA64_NONE 0x00 /* none */ +#define R_IA64_IMM14 0x21 /* symbol + addend, add imm14 */ +#define R_IA64_IMM22 0x22 /* symbol + addend, add imm22 */ +#define R_IA64_IMM64 0x23 /* symbol + addend, mov imm64 */ +#define R_IA64_DIR32MSB 0x24 /* symbol + addend, data4 MSB */ +#define R_IA64_DIR32LSB 0x25 /* symbol + addend, data4 LSB */ +#define R_IA64_DIR64MSB 0x26 /* symbol + addend, data8 MSB */ +#define R_IA64_DIR64LSB 0x27 /* symbol + addend, data8 LSB */ +#define R_IA64_GPREL22 0x2a /* @gprel(sym+add), add imm22 */ +#define R_IA64_GPREL64I 0x2b /* @gprel(sym+add), mov imm64 */ +#define R_IA64_GPREL32MSB 0x2c /* @gprel(sym+add), data4 MSB */ +#define R_IA64_GPREL32LSB 0x2d /* @gprel(sym+add), data4 LSB */ +#define R_IA64_GPREL64MSB 0x2e /* @gprel(sym+add), data8 MSB */ +#define R_IA64_GPREL64LSB 0x2f /* @gprel(sym+add), data8 LSB */ +#define R_IA64_LTOFF22 0x32 /* @ltoff(sym+add), add imm22 */ +#define R_IA64_LTOFF64I 0x33 /* @ltoff(sym+add), mov imm64 */ +#define R_IA64_PLTOFF22 0x3a /* @pltoff(sym+add), add imm22 */ +#define R_IA64_PLTOFF64I 0x3b /* @pltoff(sym+add), mov imm64 */ +#define R_IA64_PLTOFF64MSB 0x3e /* @pltoff(sym+add), data8 MSB */ +#define R_IA64_PLTOFF64LSB 0x3f /* @pltoff(sym+add), data8 LSB */ +#define R_IA64_FPTR64I 0x43 /* @fptr(sym+add), mov imm64 */ +#define R_IA64_FPTR32MSB 0x44 /* @fptr(sym+add), data4 MSB */ +#define R_IA64_FPTR32LSB 0x45 /* @fptr(sym+add), data4 LSB */ +#define R_IA64_FPTR64MSB 0x46 /* @fptr(sym+add), data8 MSB */ +#define R_IA64_FPTR64LSB 0x47 /* @fptr(sym+add), data8 LSB */ +#define R_IA64_PCREL60B 0x48 /* @pcrel(sym+add), brl */ +#define R_IA64_PCREL21B 0x49 /* @pcrel(sym+add), ptb, call */ +#define R_IA64_PCREL21M 0x4a /* @pcrel(sym+add), chk.s */ +#define R_IA64_PCREL21F 0x4b /* @pcrel(sym+add), fchkf */ +#define R_IA64_PCREL32MSB 0x4c /* @pcrel(sym+add), data4 MSB */ +#define R_IA64_PCREL32LSB 0x4d /* @pcrel(sym+add), data4 LSB */ +#define R_IA64_PCREL64MSB 0x4e /* @pcrel(sym+add), data8 MSB */ +#define R_IA64_PCREL64LSB 0x4f /* @pcrel(sym+add), data8 LSB */ +#define R_IA64_LTOFF_FPTR22 0x52 /* @ltoff(@fptr(s+a)), imm22 */ +#define R_IA64_LTOFF_FPTR64I 0x53 /* @ltoff(@fptr(s+a)), imm64 */ +#define R_IA64_LTOFF_FPTR32MSB 0x54 /* @ltoff(@fptr(s+a)), 4 MSB */ +#define R_IA64_LTOFF_FPTR32LSB 0x55 /* @ltoff(@fptr(s+a)), 4 LSB */ +#define R_IA64_LTOFF_FPTR64MSB 0x56 /* @ltoff(@fptr(s+a)), 8 MSB */ +#define R_IA64_LTOFF_FPTR64LSB 0x57 /* @ltoff(@fptr(s+a)), 8 LSB */ +#define R_IA64_SEGREL32MSB 0x5c /* @segrel(sym+add), data4 MSB */ +#define R_IA64_SEGREL32LSB 0x5d /* @segrel(sym+add), data4 LSB */ +#define R_IA64_SEGREL64MSB 0x5e /* @segrel(sym+add), data8 MSB */ +#define R_IA64_SEGREL64LSB 0x5f /* @segrel(sym+add), data8 LSB */ +#define R_IA64_SECREL32MSB 0x64 /* @secrel(sym+add), data4 MSB */ +#define R_IA64_SECREL32LSB 0x65 /* @secrel(sym+add), data4 LSB */ +#define R_IA64_SECREL64MSB 0x66 /* @secrel(sym+add), data8 MSB */ +#define R_IA64_SECREL64LSB 0x67 /* @secrel(sym+add), data8 LSB */ +#define R_IA64_REL32MSB 0x6c /* data 4 + REL */ +#define R_IA64_REL32LSB 0x6d /* data 4 + REL */ +#define R_IA64_REL64MSB 0x6e /* data 8 + REL */ +#define R_IA64_REL64LSB 0x6f /* data 8 + REL */ +#define R_IA64_LTV32MSB 0x74 /* symbol + addend, data4 MSB */ +#define R_IA64_LTV32LSB 0x75 /* symbol + addend, data4 LSB */ +#define R_IA64_LTV64MSB 0x76 /* symbol + addend, data8 MSB */ +#define R_IA64_LTV64LSB 0x77 /* symbol + addend, data8 LSB */ +#define R_IA64_PCREL21BI 0x79 /* @pcrel(sym+add), ptb, call */ +#define R_IA64_PCREL22 0x7a /* @pcrel(sym+add), imm22 */ +#define R_IA64_PCREL64I 0x7b /* @pcrel(sym+add), imm64 */ +#define R_IA64_IPLTMSB 0x80 /* dynamic reloc, imported PLT, MSB */ +#define R_IA64_IPLTLSB 0x81 /* dynamic reloc, imported PLT, LSB */ +#define R_IA64_COPY 0x84 /* dynamic reloc, data copy */ +#define R_IA64_SUB 0x85 /* -symbol + addend, add imm22 */ +#define R_IA64_LTOFF22X 0x86 /* LTOFF22, relaxable. */ +#define R_IA64_LDXMOV 0x87 /* Use of LTOFF22X. */ +#define R_IA64_TPREL14 0x91 /* @tprel(sym+add), add imm14 */ +#define R_IA64_TPREL22 0x92 /* @tprel(sym+add), add imm22 */ +#define R_IA64_TPREL64I 0x93 /* @tprel(sym+add), add imm64 */ +#define R_IA64_TPREL64MSB 0x96 /* @tprel(sym+add), data8 MSB */ +#define R_IA64_TPREL64LSB 0x97 /* @tprel(sym+add), data8 LSB */ +#define R_IA64_LTOFF_TPREL22 0x9a /* @ltoff(@tprel(s+a)), add imm22 */ +#define R_IA64_DTPMOD64MSB 0xa6 /* @dtpmod(sym+add), data8 MSB */ +#define R_IA64_DTPMOD64LSB 0xa7 /* @dtpmod(sym+add), data8 LSB */ +#define R_IA64_LTOFF_DTPMOD22 0xaa /* @ltoff(@dtpmod(s+a)), imm22 */ +#define R_IA64_DTPREL14 0xb1 /* @dtprel(sym+add), imm14 */ +#define R_IA64_DTPREL22 0xb2 /* @dtprel(sym+add), imm22 */ +#define R_IA64_DTPREL64I 0xb3 /* @dtprel(sym+add), imm64 */ +#define R_IA64_DTPREL32MSB 0xb4 /* @dtprel(sym+add), data4 MSB */ +#define R_IA64_DTPREL32LSB 0xb5 /* @dtprel(sym+add), data4 LSB */ +#define R_IA64_DTPREL64MSB 0xb6 /* @dtprel(sym+add), data8 MSB */ +#define R_IA64_DTPREL64LSB 0xb7 /* @dtprel(sym+add), data8 LSB */ +#define R_IA64_LTOFF_DTPREL22 0xba /* @ltoff(@dtprel(s+a)), imm22 */ + +/* IA-64 specific section flags: */ +#define SHF_IA_64_SHORT 0x10000000 /* section near gp */ + +/* + * We use (abuse?) this macro to insert the (empty) vm_area that is + * used to map the register backing store. I don't see any better + * place to do this, but we should discuss this with Linus once we can + * talk to him... + */ +extern void ia64_init_addr_space (void); +#define ELF_PLAT_INIT(_r, load_addr) ia64_init_addr_space() + +/* ELF register definitions. This is needed for core dump support. */ + +/* + * elf_gregset_t contains the application-level state in the following order: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm psr + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec ar.csd ar.ssd + */ +#define ELF_NGREG 128 /* we really need just 72 but let's leave some headroom... */ +#define ELF_NFPREG 128 /* f0 and f1 could be omitted, but so what... */ + +/* elf_gregset_t register offsets */ +#define ELF_GR_0_OFFSET 0 +#define ELF_NAT_OFFSET (32 * sizeof(elf_greg_t)) +#define ELF_PR_OFFSET (33 * sizeof(elf_greg_t)) +#define ELF_BR_0_OFFSET (34 * sizeof(elf_greg_t)) +#define ELF_CR_IIP_OFFSET (42 * sizeof(elf_greg_t)) +#define ELF_CFM_OFFSET (43 * sizeof(elf_greg_t)) +#define ELF_CR_IPSR_OFFSET (44 * sizeof(elf_greg_t)) +#define ELF_GR_OFFSET(i) (ELF_GR_0_OFFSET + i * sizeof(elf_greg_t)) +#define ELF_BR_OFFSET(i) (ELF_BR_0_OFFSET + i * sizeof(elf_greg_t)) +#define ELF_AR_RSC_OFFSET (45 * sizeof(elf_greg_t)) +#define ELF_AR_BSP_OFFSET (46 * sizeof(elf_greg_t)) +#define ELF_AR_BSPSTORE_OFFSET (47 * sizeof(elf_greg_t)) +#define ELF_AR_RNAT_OFFSET (48 * sizeof(elf_greg_t)) +#define ELF_AR_CCV_OFFSET (49 * sizeof(elf_greg_t)) +#define ELF_AR_UNAT_OFFSET (50 * sizeof(elf_greg_t)) +#define ELF_AR_FPSR_OFFSET (51 * sizeof(elf_greg_t)) +#define ELF_AR_PFS_OFFSET (52 * sizeof(elf_greg_t)) +#define ELF_AR_LC_OFFSET (53 * sizeof(elf_greg_t)) +#define ELF_AR_EC_OFFSET (54 * sizeof(elf_greg_t)) +#define ELF_AR_CSD_OFFSET (55 * sizeof(elf_greg_t)) +#define ELF_AR_SSD_OFFSET (56 * sizeof(elf_greg_t)) +#define ELF_AR_END_OFFSET (57 * sizeof(elf_greg_t)) + +typedef unsigned long elf_greg_t; +typedef elf_greg_t elf_gregset_t[ELF_NGREG]; + +typedef struct ia64_fpreg elf_fpreg_t; +typedef elf_fpreg_t elf_fpregset_t[ELF_NFPREG]; + + + +struct pt_regs; /* forward declaration... */ +extern void ia64_elf_core_copy_regs (struct pt_regs *src, elf_gregset_t dst); +#define ELF_CORE_COPY_REGS(_dest,_regs) ia64_elf_core_copy_regs(_regs, _dest); + +/* This macro yields a bitmask that programs can use to figure out + what instruction set this CPU supports. */ +#define ELF_HWCAP 0 + +/* This macro yields a string that ld.so will use to load + implementation specific libraries for optimization. Not terribly + relevant until we have real hardware to play with... */ +#define ELF_PLATFORM NULL + +#define elf_read_implies_exec(ex, executable_stack) \ + ((executable_stack!=EXSTACK_DISABLE_X) && ((ex).e_flags & EF_IA_64_LINUX_EXECUTABLE_STACK) != 0) + +struct task_struct; + +#define GATE_EHDR ((const struct elfhdr *) GATE_ADDR) + +/* update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT entries changes */ +#define ARCH_DLINFO \ +do { \ + extern char __kernel_syscall_via_epc[]; \ + NEW_AUX_ENT(AT_SYSINFO, (unsigned long) __kernel_syscall_via_epc); \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, (unsigned long) GATE_EHDR); \ +} while (0) + +/* + * format for entries in the Global Offset Table + */ +struct got_entry { + uint64_t val; +}; + +/* + * Layout of the Function Descriptor + */ +struct fdesc { + uint64_t addr; + uint64_t gp; +}; + +#endif /* _ASM_IA64_ELF_H */ diff --git a/arch/ia64/include/asm/emergency-restart.h b/arch/ia64/include/asm/emergency-restart.h new file mode 100644 index 000000000000..108d8c48e42e --- /dev/null +++ b/arch/ia64/include/asm/emergency-restart.h @@ -0,0 +1,6 @@ +#ifndef _ASM_EMERGENCY_RESTART_H +#define _ASM_EMERGENCY_RESTART_H + +#include + +#endif /* _ASM_EMERGENCY_RESTART_H */ diff --git a/arch/ia64/include/asm/esi.h b/arch/ia64/include/asm/esi.h new file mode 100644 index 000000000000..56d1310af06e --- /dev/null +++ b/arch/ia64/include/asm/esi.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * ESI service calls. + * + * Copyright (c) Copyright 2005-2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + */ +#ifndef esi_h +#define esi_h + +#include + +#define ESI_QUERY 0x00000001 +#define ESI_OPEN_HANDLE 0x02000000 +#define ESI_CLOSE_HANDLE 0x02000001 + +enum esi_proc_type { + ESI_PROC_SERIALIZED, /* calls need to be serialized */ + ESI_PROC_MP_SAFE, /* MP-safe, but not reentrant */ + ESI_PROC_REENTRANT /* MP-safe and reentrant */ +}; + +extern struct ia64_sal_retval esi_call_phys (void *, u64 *); +extern int ia64_esi_call(efi_guid_t, struct ia64_sal_retval *, + enum esi_proc_type, + u64, u64, u64, u64, u64, u64, u64, u64); +extern int ia64_esi_call_phys(efi_guid_t, struct ia64_sal_retval *, u64, u64, + u64, u64, u64, u64, u64, u64); + +#endif /* esi_h */ diff --git a/arch/ia64/include/asm/exception.h b/arch/ia64/include/asm/exception.h new file mode 100644 index 000000000000..1d5df8116a31 --- /dev/null +++ b/arch/ia64/include/asm/exception.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +#ifndef __ASM_EXCEPTION_H +#define __ASM_EXCEPTION_H + +struct pt_regs; +struct exception_table_entry; + +extern void ia64_handle_exception(struct pt_regs *regs, + const struct exception_table_entry *e); + +#define ia64_done_with_exception(regs) \ +({ \ + int __ex_ret = 0; \ + const struct exception_table_entry *e; \ + e = search_exception_tables((regs)->cr_iip + ia64_psr(regs)->ri); \ + if (e) { \ + ia64_handle_exception(regs, e); \ + __ex_ret = 1; \ + } \ + __ex_ret; \ +}) + +#endif /* __ASM_EXCEPTION_H */ diff --git a/arch/ia64/include/asm/extable.h b/arch/ia64/include/asm/extable.h new file mode 100644 index 000000000000..83eac6aa0639 --- /dev/null +++ b/arch/ia64/include/asm/extable.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_EXTABLE_H +#define _ASM_IA64_EXTABLE_H + +#define ARCH_HAS_RELATIVE_EXTABLE + +struct exception_table_entry { + int insn; /* location-relative address of insn this fixup is for */ + int fixup; /* location-relative continuation addr.; if bit 2 is set, r9 is set to 0 */ +}; + +#endif diff --git a/arch/ia64/include/asm/fb.h b/arch/ia64/include/asm/fb.h new file mode 100644 index 000000000000..1717b26fd423 --- /dev/null +++ b/arch/ia64/include/asm/fb.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_FB_H_ +#define _ASM_FB_H_ + +#include +#include +#include + +#include + +struct file; + +static inline void fb_pgprotect(struct file *file, struct vm_area_struct *vma, + unsigned long off) +{ + if (efi_range_is_wc(vma->vm_start, vma->vm_end - vma->vm_start)) + vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot); + else + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +} +#define fb_pgprotect fb_pgprotect + +static inline void fb_memcpy_fromio(void *to, const volatile void __iomem *from, size_t n) +{ + memcpy(to, (void __force *)from, n); +} +#define fb_memcpy_fromio fb_memcpy_fromio + +static inline void fb_memcpy_toio(volatile void __iomem *to, const void *from, size_t n) +{ + memcpy((void __force *)to, from, n); +} +#define fb_memcpy_toio fb_memcpy_toio + +static inline void fb_memset_io(volatile void __iomem *addr, int c, size_t n) +{ + memset((void __force *)addr, c, n); +} +#define fb_memset fb_memset_io + +#include + +#endif /* _ASM_FB_H_ */ diff --git a/arch/ia64/include/asm/fpswa.h b/arch/ia64/include/asm/fpswa.h new file mode 100644 index 000000000000..2a0c23728b26 --- /dev/null +++ b/arch/ia64/include/asm/fpswa.h @@ -0,0 +1,74 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_FPSWA_H +#define _ASM_IA64_FPSWA_H + +/* + * Floating-point Software Assist + * + * Copyright (C) 1999 Intel Corporation. + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Goutham Rao + */ + +typedef struct { + /* 4 * 128 bits */ + unsigned long fp_lp[4*2]; +} fp_state_low_preserved_t; + +typedef struct { + /* 10 * 128 bits */ + unsigned long fp_lv[10 * 2]; +} fp_state_low_volatile_t; + +typedef struct { + /* 16 * 128 bits */ + unsigned long fp_hp[16 * 2]; +} fp_state_high_preserved_t; + +typedef struct { + /* 96 * 128 bits */ + unsigned long fp_hv[96 * 2]; +} fp_state_high_volatile_t; + +/** + * floating point state to be passed to the FP emulation library by + * the trap/fault handler + */ +typedef struct { + unsigned long bitmask_low64; + unsigned long bitmask_high64; + fp_state_low_preserved_t *fp_state_low_preserved; + fp_state_low_volatile_t *fp_state_low_volatile; + fp_state_high_preserved_t *fp_state_high_preserved; + fp_state_high_volatile_t *fp_state_high_volatile; +} fp_state_t; + +typedef struct { + unsigned long status; + unsigned long err0; + unsigned long err1; + unsigned long err2; +} fpswa_ret_t; + +/** + * function header for the Floating Point software assist + * library. This function is invoked by the Floating point software + * assist trap/fault handler. + */ +typedef fpswa_ret_t (*efi_fpswa_t) (unsigned long trap_type, void *bundle, unsigned long *ipsr, + unsigned long *fsr, unsigned long *isr, unsigned long *preds, + unsigned long *ifs, fp_state_t *fp_state); + +/** + * This is the FPSWA library interface as defined by EFI. We need to pass a + * pointer to the interface itself on a call to the assist library + */ +typedef struct { + unsigned int revision; + unsigned int reserved; + efi_fpswa_t fpswa; +} fpswa_interface_t; + +extern fpswa_interface_t *fpswa_interface; + +#endif /* _ASM_IA64_FPSWA_H */ diff --git a/arch/ia64/include/asm/ftrace.h b/arch/ia64/include/asm/ftrace.h new file mode 100644 index 000000000000..a07a8e575453 --- /dev/null +++ b/arch/ia64/include/asm/ftrace.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_FTRACE_H +#define _ASM_IA64_FTRACE_H + +#ifdef CONFIG_FUNCTION_TRACER +#define MCOUNT_INSN_SIZE 32 /* sizeof mcount call */ + +#ifndef __ASSEMBLY__ +extern void _mcount(unsigned long pfs, unsigned long r1, unsigned long b0, unsigned long r0); +#define mcount _mcount + +/* In IA64, MCOUNT_ADDR is set in link time, so it's not a constant at compile time */ +#define MCOUNT_ADDR (((struct fnptr *)mcount)->ip) +#define FTRACE_ADDR (((struct fnptr *)ftrace_caller)->ip) + +static inline unsigned long ftrace_call_adjust(unsigned long addr) +{ + /* second bundle, insn 2 */ + return addr - 0x12; +} + +struct dyn_arch_ftrace { +}; +#endif + +#endif /* CONFIG_FUNCTION_TRACER */ + +#endif /* _ASM_IA64_FTRACE_H */ diff --git a/arch/ia64/include/asm/futex.h b/arch/ia64/include/asm/futex.h new file mode 100644 index 000000000000..1db26b432d8c --- /dev/null +++ b/arch/ia64/include/asm/futex.h @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_FUTEX_H +#define _ASM_FUTEX_H + +#include +#include +#include + +#define __futex_atomic_op1(insn, ret, oldval, uaddr, oparg) \ +do { \ + register unsigned long r8 __asm ("r8") = 0; \ + __asm__ __volatile__( \ + " mf;; \n" \ + "[1:] " insn ";; \n" \ + " .xdata4 \"__ex_table\", 1b-., 2f-. \n" \ + "[2:]" \ + : "+r" (r8), "=r" (oldval) \ + : "r" (uaddr), "r" (oparg) \ + : "memory"); \ + ret = r8; \ +} while (0) + +#define __futex_atomic_op2(insn, ret, oldval, uaddr, oparg) \ +do { \ + register unsigned long r8 __asm ("r8") = 0; \ + int val, newval; \ + do { \ + __asm__ __volatile__( \ + " mf;; \n" \ + "[1:] ld4 %3=[%4];; \n" \ + " mov %2=%3 \n" \ + insn ";; \n" \ + " mov ar.ccv=%2;; \n" \ + "[2:] cmpxchg4.acq %1=[%4],%3,ar.ccv;; \n" \ + " .xdata4 \"__ex_table\", 1b-., 3f-.\n" \ + " .xdata4 \"__ex_table\", 2b-., 3f-.\n" \ + "[3:]" \ + : "+r" (r8), "=r" (val), "=&r" (oldval), \ + "=&r" (newval) \ + : "r" (uaddr), "r" (oparg) \ + : "memory"); \ + if (unlikely (r8)) \ + break; \ + } while (unlikely (val != oldval)); \ + ret = r8; \ +} while (0) + +static inline int +arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr) +{ + int oldval = 0, ret; + + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op1("xchg4 %1=[%2],%3", ret, oldval, uaddr, + oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op2("add %3=%3,%5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op2("or %3=%3,%5", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op2("and %3=%3,%5", ret, oldval, uaddr, + ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op2("xor %3=%3,%5", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + if (!ret) + *oval = oldval; + + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + if (!access_ok(uaddr, sizeof(u32))) + return -EFAULT; + + { + register unsigned long r8 __asm ("r8") = 0; + unsigned long prev; + __asm__ __volatile__( + " mf;; \n" + " mov ar.ccv=%4;; \n" + "[1:] cmpxchg4.acq %1=[%2],%3,ar.ccv \n" + " .xdata4 \"__ex_table\", 1b-., 2f-. \n" + "[2:]" + : "+r" (r8), "=&r" (prev) + : "r" (uaddr), "r" (newval), + "rO" ((long) (unsigned) oldval) + : "memory"); + *uval = prev; + return r8; + } +} + +#endif /* _ASM_FUTEX_H */ diff --git a/arch/ia64/include/asm/gcc_intrin.h b/arch/ia64/include/asm/gcc_intrin.h new file mode 100644 index 000000000000..83f230b23867 --- /dev/null +++ b/arch/ia64/include/asm/gcc_intrin.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Copyright (C) 2002,2003 Jun Nakajima + * Copyright (C) 2002,2003 Suresh Siddha + */ +#ifndef _ASM_IA64_GCC_INTRIN_H +#define _ASM_IA64_GCC_INTRIN_H + +#include + +register unsigned long ia64_r13 asm ("r13") __used; +#endif /* _ASM_IA64_GCC_INTRIN_H */ diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h new file mode 100644 index 000000000000..ccde7c2ba00f --- /dev/null +++ b/arch/ia64/include/asm/hardirq.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_HARDIRQ_H +#define _ASM_IA64_HARDIRQ_H + +/* + * Modified 1998-2002, 2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* + * No irq_cpustat_t for IA-64. The data is held in the per-CPU data structure. + */ + +#define __ARCH_IRQ_STAT 1 + +#define local_softirq_pending_ref ia64_cpu_info.softirq_pending + +#include +#include + +#include + +extern void __iomem *ipi_base_addr; + +void ack_bad_irq(unsigned int irq); + +#endif /* _ASM_IA64_HARDIRQ_H */ diff --git a/arch/ia64/include/asm/hugetlb.h b/arch/ia64/include/asm/hugetlb.h new file mode 100644 index 000000000000..026ead47cd53 --- /dev/null +++ b/arch/ia64/include/asm/hugetlb.h @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_HUGETLB_H +#define _ASM_IA64_HUGETLB_H + +#include + +#define __HAVE_ARCH_HUGETLB_FREE_PGD_RANGE +void hugetlb_free_pgd_range(struct mmu_gather *tlb, unsigned long addr, + unsigned long end, unsigned long floor, + unsigned long ceiling); + +#define __HAVE_ARCH_PREPARE_HUGEPAGE_RANGE +int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len); + +static inline int is_hugepage_only_range(struct mm_struct *mm, + unsigned long addr, + unsigned long len) +{ + return (REGION_NUMBER(addr) == RGN_HPAGE || + REGION_NUMBER((addr)+(len)-1) == RGN_HPAGE); +} +#define is_hugepage_only_range is_hugepage_only_range + +#define __HAVE_ARCH_HUGE_PTEP_CLEAR_FLUSH +static inline pte_t huge_ptep_clear_flush(struct vm_area_struct *vma, + unsigned long addr, pte_t *ptep) +{ + return *ptep; +} + +#include + +#endif /* _ASM_IA64_HUGETLB_H */ diff --git a/arch/ia64/include/asm/hw_irq.h b/arch/ia64/include/asm/hw_irq.h new file mode 100644 index 000000000000..5d267132f8cb --- /dev/null +++ b/arch/ia64/include/asm/hw_irq.h @@ -0,0 +1,167 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_HW_IRQ_H +#define _ASM_IA64_HW_IRQ_H + +/* + * Copyright (C) 2001-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include +#include +#include + +#include +#include + +typedef u8 ia64_vector; + +/* + * 0 special + * + * 1,3-14 are reserved from firmware + * + * 16-255 (vectored external interrupts) are available + * + * 15 spurious interrupt (see IVR) + * + * 16 lowest priority, 255 highest priority + * + * 15 classes of 16 interrupts each. + */ +#define IA64_MIN_VECTORED_IRQ 16 +#define IA64_MAX_VECTORED_IRQ 255 +#define IA64_NUM_VECTORS 256 + +#define AUTO_ASSIGN -1 + +#define IA64_SPURIOUS_INT_VECTOR 0x0f + +/* + * Vectors 0x10-0x1f are used for low priority interrupts, e.g. CMCI. + */ +#define IA64_CPEP_VECTOR 0x1c /* corrected platform error polling vector */ +#define IA64_CMCP_VECTOR 0x1d /* corrected machine-check polling vector */ +#define IA64_CPE_VECTOR 0x1e /* corrected platform error interrupt vector */ +#define IA64_CMC_VECTOR 0x1f /* corrected machine-check interrupt vector */ +/* + * Vectors 0x20-0x2f are reserved for legacy ISA IRQs. + * Use vectors 0x30-0xe7 as the default device vector range for ia64. + * Platforms may choose to reduce this range in platform_irq_setup, but the + * platform range must fall within + * [IA64_DEF_FIRST_DEVICE_VECTOR..IA64_DEF_LAST_DEVICE_VECTOR] + */ +extern int ia64_first_device_vector; +extern int ia64_last_device_vector; + +#ifdef CONFIG_SMP +/* Reserve the lower priority vector than device vectors for "move IRQ" IPI */ +#define IA64_IRQ_MOVE_VECTOR 0x30 /* "move IRQ" IPI */ +#define IA64_DEF_FIRST_DEVICE_VECTOR 0x31 +#else +#define IA64_DEF_FIRST_DEVICE_VECTOR 0x30 +#endif +#define IA64_DEF_LAST_DEVICE_VECTOR 0xe7 +#define IA64_FIRST_DEVICE_VECTOR ia64_first_device_vector +#define IA64_LAST_DEVICE_VECTOR ia64_last_device_vector +#define IA64_MAX_DEVICE_VECTORS (IA64_DEF_LAST_DEVICE_VECTOR - IA64_DEF_FIRST_DEVICE_VECTOR + 1) +#define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1) + +#define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */ +#define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */ +#define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */ +#define IA64_IPI_LOCAL_TLB_FLUSH 0xfc /* SMP flush local TLB */ +#define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */ +#define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */ + +/* Used for encoding redirected irqs */ + +#define IA64_IRQ_REDIRECTED (1 << 31) + +/* IA64 inter-cpu interrupt related definitions */ + +#define IA64_IPI_DEFAULT_BASE_ADDR 0xfee00000 + +/* Delivery modes for inter-cpu interrupts */ +enum { + IA64_IPI_DM_INT = 0x0, /* pend an external interrupt */ + IA64_IPI_DM_PMI = 0x2, /* pend a PMI */ + IA64_IPI_DM_NMI = 0x4, /* pend an NMI (vector 2) */ + IA64_IPI_DM_INIT = 0x5, /* pend an INIT interrupt */ + IA64_IPI_DM_EXTINT = 0x7, /* pend an 8259-compatible interrupt. */ +}; + +extern __u8 isa_irq_to_vector_map[16]; +#define isa_irq_to_vector(x) isa_irq_to_vector_map[(x)] + +struct irq_cfg { + ia64_vector vector; + cpumask_t domain; + cpumask_t old_domain; + unsigned move_cleanup_count; + u8 move_in_progress : 1; +}; +extern spinlock_t vector_lock; +extern struct irq_cfg irq_cfg[NR_IRQS]; +#define irq_to_domain(x) irq_cfg[(x)].domain +DECLARE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq); + +extern struct irq_chip irq_type_ia64_lsapic; /* CPU-internal interrupt controller */ + +#define ia64_register_ipi ia64_native_register_ipi +#define assign_irq_vector ia64_native_assign_irq_vector +#define free_irq_vector ia64_native_free_irq_vector +#define ia64_resend_irq ia64_native_resend_irq + +extern void ia64_native_register_ipi(void); +extern int bind_irq_vector(int irq, int vector, cpumask_t domain); +extern int ia64_native_assign_irq_vector (int irq); /* allocate a free vector */ +extern void ia64_native_free_irq_vector (int vector); +extern int reserve_irq_vector (int vector); +extern void __setup_vector_irq(int cpu); +extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect); +extern void destroy_and_reserve_irq (unsigned int irq); + +#ifdef CONFIG_SMP +extern int irq_prepare_move(int irq, int cpu); +extern void irq_complete_move(unsigned int irq); +#else +static inline int irq_prepare_move(int irq, int cpu) { return 0; } +static inline void irq_complete_move(unsigned int irq) {} +#endif + +static inline void ia64_native_resend_irq(unsigned int vector) +{ + ia64_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0); +} + +/* + * Next follows the irq descriptor interface. On IA-64, each CPU supports 256 interrupt + * vectors. On smaller systems, there is a one-to-one correspondence between interrupt + * vectors and the Linux irq numbers. However, larger systems may have multiple interrupt + * domains meaning that the translation from vector number to irq number depends on the + * interrupt domain that a CPU belongs to. This API abstracts such platform-dependent + * differences and provides a uniform means to translate between vector and irq numbers + * and to obtain the irq descriptor for a given irq number. + */ + +/* Extract the IA-64 vector that corresponds to IRQ. */ +static inline ia64_vector +irq_to_vector (int irq) +{ + return irq_cfg[irq].vector; +} + +/* + * Convert the local IA-64 vector to the corresponding irq number. This translation is + * done in the context of the interrupt domain that the currently executing CPU belongs + * to. + */ +static inline unsigned int +local_vector_to_irq (ia64_vector vec) +{ + return __this_cpu_read(vector_irq[vec]); +} + +#endif /* _ASM_IA64_HW_IRQ_H */ diff --git a/arch/ia64/include/asm/idle.h b/arch/ia64/include/asm/idle.h new file mode 100644 index 000000000000..97c55b97e0ba --- /dev/null +++ b/arch/ia64/include/asm/idle.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_IDLE_H +#define _ASM_IA64_IDLE_H + +static inline void enter_idle(void) { } +static inline void exit_idle(void) { } + +#endif /* _ASM_IA64_IDLE_H */ diff --git a/arch/ia64/include/asm/intrinsics.h b/arch/ia64/include/asm/intrinsics.h new file mode 100644 index 000000000000..035b17fe12ef --- /dev/null +++ b/arch/ia64/include/asm/intrinsics.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Compiler-dependent intrinsics. + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_INTRINSICS_H +#define _ASM_IA64_INTRINSICS_H + +#include + +#endif /* _ASM_IA64_INTRINSICS_H */ diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h new file mode 100644 index 000000000000..eedc0afa8cad --- /dev/null +++ b/arch/ia64/include/asm/io.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_IO_H +#define _ASM_IA64_IO_H + +/* + * This file contains the definitions for the emulated IO instructions + * inb/inw/inl/outb/outw/outl and the "string versions" of the same + * (insb/insw/insl/outsb/outsw/outsl). You can also use "pausing" + * versions of the single-IO instructions (inb_p/inw_p/..). + * + * This file is not meant to be obfuscating: it's just complicated to + * (a) handle it all in a way that makes gcc able to optimize it as + * well as possible and (b) trying to avoid writing the same thing + * over and over again with slight variations and possibly making a + * mistake somewhere. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ + +#include +#include + +#define __IA64_UNCACHED_OFFSET RGN_BASE(RGN_UNCACHED) + +/* + * The legacy I/O space defined by the ia64 architecture supports only 65536 ports, but + * large machines may have multiple other I/O spaces so we can't place any a priori limit + * on IO_SPACE_LIMIT. These additional spaces are described in ACPI. + */ +#define IO_SPACE_LIMIT 0xffffffffffffffffUL + +#define MAX_IO_SPACES_BITS 8 +#define MAX_IO_SPACES (1UL << MAX_IO_SPACES_BITS) +#define IO_SPACE_BITS 24 +#define IO_SPACE_SIZE (1UL << IO_SPACE_BITS) + +#define IO_SPACE_NR(port) ((port) >> IO_SPACE_BITS) +#define IO_SPACE_BASE(space) ((space) << IO_SPACE_BITS) +#define IO_SPACE_PORT(port) ((port) & (IO_SPACE_SIZE - 1)) + +#define IO_SPACE_SPARSE_ENCODING(p) ((((p) >> 2) << 12) | ((p) & 0xfff)) + +struct io_space { + unsigned long mmio_base; /* base in MMIO space */ + int sparse; +}; + +extern struct io_space io_space[]; +extern unsigned int num_io_spaces; + +# ifdef __KERNEL__ + +/* + * All MMIO iomem cookies are in region 6; anything less is a PIO cookie: + * 0xCxxxxxxxxxxxxxxx MMIO cookie (return from ioremap) + * 0x000000001SPPPPPP PIO cookie (S=space number, P..P=port) + * + * ioread/writeX() uses the leading 1 in PIO cookies (PIO_OFFSET) to catch + * code that uses bare port numbers without the prerequisite pci_iomap(). + */ +#define PIO_OFFSET (1UL << (MAX_IO_SPACES_BITS + IO_SPACE_BITS)) +#define PIO_MASK (PIO_OFFSET - 1) +#define PIO_RESERVED __IA64_UNCACHED_OFFSET +#define HAVE_ARCH_PIO_SIZE + +#include +#include +#include + +/* + * Change virtual addresses to physical addresses and vv. + */ +static inline unsigned long +virt_to_phys (volatile void *address) +{ + return (unsigned long) address - PAGE_OFFSET; +} +#define virt_to_phys virt_to_phys + +static inline void* +phys_to_virt (unsigned long address) +{ + return (void *) (address + PAGE_OFFSET); +} +#define phys_to_virt phys_to_virt + +#define ARCH_HAS_VALID_PHYS_ADDR_RANGE +extern u64 kern_mem_attribute (unsigned long phys_addr, unsigned long size); +extern int valid_phys_addr_range (phys_addr_t addr, size_t count); /* efi.c */ +extern int valid_mmap_phys_addr_range (unsigned long pfn, size_t count); + +# endif /* KERNEL */ + +/* + * Memory fence w/accept. This should never be used in code that is + * not IA-64 specific. + */ +#define __ia64_mf_a() ia64_mfa() + +static inline void* +__ia64_mk_io_addr (unsigned long port) +{ + struct io_space *space; + unsigned long offset; + + space = &io_space[IO_SPACE_NR(port)]; + port = IO_SPACE_PORT(port); + if (space->sparse) + offset = IO_SPACE_SPARSE_ENCODING(port); + else + offset = port; + + return (void *) (space->mmio_base | offset); +} + +/* + * For the in/out routines, we need to do "mf.a" _after_ doing the I/O access to ensure + * that the access has completed before executing other I/O accesses. Since we're doing + * the accesses through an uncachable (UC) translation, the CPU will execute them in + * program order. However, we still need to tell the compiler not to shuffle them around + * during optimization, which is why we use "volatile" pointers. + */ + +#define inb inb +static inline unsigned int inb(unsigned long port) +{ + volatile unsigned char *addr = __ia64_mk_io_addr(port); + unsigned char ret; + + ret = *addr; + __ia64_mf_a(); + return ret; +} + +#define inw inw +static inline unsigned int inw(unsigned long port) +{ + volatile unsigned short *addr = __ia64_mk_io_addr(port); + unsigned short ret; + + ret = *addr; + __ia64_mf_a(); + return ret; +} + +#define inl inl +static inline unsigned int inl(unsigned long port) +{ + volatile unsigned int *addr = __ia64_mk_io_addr(port); + unsigned int ret; + + ret = *addr; + __ia64_mf_a(); + return ret; +} + +#define outb outb +static inline void outb(unsigned char val, unsigned long port) +{ + volatile unsigned char *addr = __ia64_mk_io_addr(port); + + *addr = val; + __ia64_mf_a(); +} + +#define outw outw +static inline void outw(unsigned short val, unsigned long port) +{ + volatile unsigned short *addr = __ia64_mk_io_addr(port); + + *addr = val; + __ia64_mf_a(); +} + +#define outl outl +static inline void outl(unsigned int val, unsigned long port) +{ + volatile unsigned int *addr = __ia64_mk_io_addr(port); + + *addr = val; + __ia64_mf_a(); +} + +#define insb insb +static inline void insb(unsigned long port, void *dst, unsigned long count) +{ + unsigned char *dp = dst; + + while (count--) + *dp++ = inb(port); +} + +#define insw insw +static inline void insw(unsigned long port, void *dst, unsigned long count) +{ + unsigned short *dp = dst; + + while (count--) + put_unaligned(inw(port), dp++); +} + +#define insl insl +static inline void insl(unsigned long port, void *dst, unsigned long count) +{ + unsigned int *dp = dst; + + while (count--) + put_unaligned(inl(port), dp++); +} + +#define outsb outsb +static inline void outsb(unsigned long port, const void *src, + unsigned long count) +{ + const unsigned char *sp = src; + + while (count--) + outb(*sp++, port); +} + +#define outsw outsw +static inline void outsw(unsigned long port, const void *src, + unsigned long count) +{ + const unsigned short *sp = src; + + while (count--) + outw(get_unaligned(sp++), port); +} + +#define outsl outsl +static inline void outsl(unsigned long port, const void *src, + unsigned long count) +{ + const unsigned int *sp = src; + + while (count--) + outl(get_unaligned(sp++), port); +} + +# ifdef __KERNEL__ + +#define _PAGE_IOREMAP pgprot_val(PAGE_KERNEL) + +extern void __iomem * ioremap_uc(unsigned long offset, unsigned long size); + +#define ioremap_prot ioremap_prot +#define ioremap_cache ioremap +#define ioremap_uc ioremap_uc +#define iounmap iounmap + +/* + * String version of IO memory access ops: + */ +extern void memcpy_fromio(void *dst, const volatile void __iomem *src, long n); +extern void memcpy_toio(volatile void __iomem *dst, const void *src, long n); +extern void memset_io(volatile void __iomem *s, int c, long n); + +#define memcpy_fromio memcpy_fromio +#define memcpy_toio memcpy_toio +#define memset_io memset_io +#define xlate_dev_mem_ptr xlate_dev_mem_ptr +#include +#undef PCI_IOBASE + +# endif /* __KERNEL__ */ + +#endif /* _ASM_IA64_IO_H */ diff --git a/arch/ia64/include/asm/iommu.h b/arch/ia64/include/asm/iommu.h new file mode 100644 index 000000000000..eb0db20c9d4c --- /dev/null +++ b/arch/ia64/include/asm/iommu.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_IOMMU_H +#define _ASM_IA64_IOMMU_H 1 + +#include + +/* 10 seconds */ +#define DMAR_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) + +extern void no_iommu_init(void); +#ifdef CONFIG_INTEL_IOMMU +extern int force_iommu, no_iommu; +extern int iommu_detected; + +static inline int __init +arch_rmrr_sanity_check(struct acpi_dmar_reserved_memory *rmrr) { return 0; } +#else +#define no_iommu (1) +#define iommu_detected (0) +#endif + +#endif diff --git a/arch/ia64/include/asm/iosapic.h b/arch/ia64/include/asm/iosapic.h new file mode 100644 index 000000000000..a91aeb413e17 --- /dev/null +++ b/arch/ia64/include/asm/iosapic.h @@ -0,0 +1,106 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_IA64_IOSAPIC_H +#define __ASM_IA64_IOSAPIC_H + +#define IOSAPIC_REG_SELECT 0x0 +#define IOSAPIC_WINDOW 0x10 +#define IOSAPIC_EOI 0x40 + +#define IOSAPIC_VERSION 0x1 + +/* + * Redirection table entry + */ +#define IOSAPIC_RTE_LOW(i) (0x10+i*2) +#define IOSAPIC_RTE_HIGH(i) (0x11+i*2) + +#define IOSAPIC_DEST_SHIFT 16 + +/* + * Delivery mode + */ +#define IOSAPIC_DELIVERY_SHIFT 8 +#define IOSAPIC_FIXED 0x0 +#define IOSAPIC_LOWEST_PRIORITY 0x1 +#define IOSAPIC_PMI 0x2 +#define IOSAPIC_NMI 0x4 +#define IOSAPIC_INIT 0x5 +#define IOSAPIC_EXTINT 0x7 + +/* + * Interrupt polarity + */ +#define IOSAPIC_POLARITY_SHIFT 13 +#define IOSAPIC_POL_HIGH 0 +#define IOSAPIC_POL_LOW 1 + +/* + * Trigger mode + */ +#define IOSAPIC_TRIGGER_SHIFT 15 +#define IOSAPIC_EDGE 0 +#define IOSAPIC_LEVEL 1 + +/* + * Mask bit + */ + +#define IOSAPIC_MASK_SHIFT 16 +#define IOSAPIC_MASK (1< + * Stephane Eranian + * + * 11/24/98 S.Eranian updated TIMER_IRQ and irq_canonicalize + * 01/20/99 S.Eranian added keyboard interrupt + * 02/29/00 D.Mosberger moved most things into hw_irq.h + */ + +#include +#include +#include + +#define NR_IRQS IA64_NATIVE_NR_IRQS + +static __inline__ int +irq_canonicalize (int irq) +{ + /* + * We do the legacy thing here of pretending that irqs < 16 + * are 8259 irqs. This really shouldn't be necessary at all, + * but we keep it here as serial.c still uses it... + */ + return ((irq == 2) ? 9 : irq); +} + +extern void set_irq_affinity_info (unsigned int irq, int dest, int redir); + +int create_irq(void); +void destroy_irq(unsigned int irq); + +#endif /* _ASM_IA64_IRQ_H */ diff --git a/arch/ia64/include/asm/irq_regs.h b/arch/ia64/include/asm/irq_regs.h new file mode 100644 index 000000000000..3dd9c0b70270 --- /dev/null +++ b/arch/ia64/include/asm/irq_regs.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/irq_remapping.h b/arch/ia64/include/asm/irq_remapping.h new file mode 100644 index 000000000000..547a6e87018c --- /dev/null +++ b/arch/ia64/include/asm/irq_remapping.h @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __IA64_INTR_REMAPPING_H +#define __IA64_INTR_REMAPPING_H +#define irq_remapping_enabled 0 +#endif diff --git a/arch/ia64/include/asm/irqflags.h b/arch/ia64/include/asm/irqflags.h new file mode 100644 index 000000000000..1dc30f12e545 --- /dev/null +++ b/arch/ia64/include/asm/irqflags.h @@ -0,0 +1,95 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IRQ flags defines. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ + +#ifndef _ASM_IA64_IRQFLAGS_H +#define _ASM_IA64_IRQFLAGS_H + +#include +#include + +#ifdef CONFIG_IA64_DEBUG_IRQ +extern unsigned long last_cli_ip; +static inline void arch_maybe_save_ip(unsigned long flags) +{ + if (flags & IA64_PSR_I) + last_cli_ip = ia64_getreg(_IA64_REG_IP); +} +#else +#define arch_maybe_save_ip(flags) do {} while (0) +#endif + +/* + * - clearing psr.i is implicitly serialized (visible by next insn) + * - setting psr.i requires data serialization + * - we need a stop-bit before reading PSR because we sometimes + * write a floating-point register right before reading the PSR + * and that writes to PSR.mfl + */ + +static inline unsigned long arch_local_save_flags(void) +{ + ia64_stop(); + return ia64_getreg(_IA64_REG_PSR); +} + +static inline unsigned long arch_local_irq_save(void) +{ + unsigned long flags = arch_local_save_flags(); + + ia64_stop(); + ia64_rsm(IA64_PSR_I); + arch_maybe_save_ip(flags); + return flags; +} + +static inline void arch_local_irq_disable(void) +{ +#ifdef CONFIG_IA64_DEBUG_IRQ + arch_local_irq_save(); +#else + ia64_stop(); + ia64_rsm(IA64_PSR_I); +#endif +} + +static inline void arch_local_irq_enable(void) +{ + ia64_stop(); + ia64_ssm(IA64_PSR_I); + ia64_srlz_d(); +} + +static inline void arch_local_irq_restore(unsigned long flags) +{ +#ifdef CONFIG_IA64_DEBUG_IRQ + unsigned long old_psr = arch_local_save_flags(); +#endif + ia64_intrin_local_irq_restore(flags & IA64_PSR_I); + arch_maybe_save_ip(old_psr & ~flags); +} + +static inline bool arch_irqs_disabled_flags(unsigned long flags) +{ + return (flags & IA64_PSR_I) == 0; +} + +static inline bool arch_irqs_disabled(void) +{ + return arch_irqs_disabled_flags(arch_local_save_flags()); +} + +static inline void arch_safe_halt(void) +{ + arch_local_irq_enable(); + ia64_pal_halt_light(); /* PAL_HALT_LIGHT */ +} + + +#endif /* _ASM_IA64_IRQFLAGS_H */ diff --git a/arch/ia64/include/asm/kdebug.h b/arch/ia64/include/asm/kdebug.h new file mode 100644 index 000000000000..4f7e6dc974bc --- /dev/null +++ b/arch/ia64/include/asm/kdebug.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _IA64_KDEBUG_H +#define _IA64_KDEBUG_H 1 +/* + * + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch and Anil S Keshavamurthy + * adopted from + * include/asm-x86_64/kdebug.h + * + * 2005-Oct Keith Owens . Expand notify_die to cover more + * events. + */ + +enum die_val { + DIE_BREAK = 1, + DIE_FAULT, + DIE_OOPS, + DIE_MACHINE_HALT, + DIE_MACHINE_RESTART, + DIE_MCA_MONARCH_ENTER, + DIE_MCA_MONARCH_PROCESS, + DIE_MCA_MONARCH_LEAVE, + DIE_MCA_SLAVE_ENTER, + DIE_MCA_SLAVE_PROCESS, + DIE_MCA_SLAVE_LEAVE, + DIE_MCA_RENDZVOUS_ENTER, + DIE_MCA_RENDZVOUS_PROCESS, + DIE_MCA_RENDZVOUS_LEAVE, + DIE_MCA_NEW_TIMEOUT, + DIE_INIT_ENTER, + DIE_INIT_MONARCH_ENTER, + DIE_INIT_MONARCH_PROCESS, + DIE_INIT_MONARCH_LEAVE, + DIE_INIT_SLAVE_ENTER, + DIE_INIT_SLAVE_PROCESS, + DIE_INIT_SLAVE_LEAVE, + DIE_KDEBUG_ENTER, + DIE_KDEBUG_LEAVE, + DIE_KDUMP_ENTER, + DIE_KDUMP_LEAVE, +}; + +#endif diff --git a/arch/ia64/include/asm/kexec.h b/arch/ia64/include/asm/kexec.h new file mode 100644 index 000000000000..294b1e1ebd2d --- /dev/null +++ b/arch/ia64/include/asm/kexec.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_KEXEC_H +#define _ASM_IA64_KEXEC_H + +#include + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT TASK_SIZE + +#define KEXEC_CONTROL_PAGE_SIZE (8192 + 8192 + 4096) + +/* The native architecture */ +#define KEXEC_ARCH KEXEC_ARCH_IA_64 + +#define kexec_flush_icache_page(page) do { \ + unsigned long page_addr = (unsigned long)page_address(page); \ + flush_icache_range(page_addr, page_addr + PAGE_SIZE); \ + } while(0) + +extern struct kimage *ia64_kimage; +extern const unsigned int relocate_new_kernel_size; +extern void relocate_new_kernel(unsigned long, unsigned long, + struct ia64_boot_param *, unsigned long); +static inline void +crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) +{ +} +extern struct resource efi_memmap_res; +extern struct resource boot_param_res; +extern void kdump_smp_send_stop(void); +extern void kdump_smp_send_init(void); +extern void kexec_disable_iosapic(void); +extern void crash_save_this_cpu(void); +struct rsvd_region; +extern unsigned long kdump_find_rsvd_region(unsigned long size, + struct rsvd_region *rsvd_regions, int n); +extern void kdump_cpu_freeze(struct unw_frame_info *info, void *arg); +extern int kdump_status[]; +extern atomic_t kdump_cpu_freezed; +extern atomic_t kdump_in_progress; + +#endif /* _ASM_IA64_KEXEC_H */ diff --git a/arch/ia64/include/asm/kprobes.h b/arch/ia64/include/asm/kprobes.h new file mode 100644 index 000000000000..9e956768946c --- /dev/null +++ b/arch/ia64/include/asm/kprobes.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +#ifndef _ASM_KPROBES_H +#define _ASM_KPROBES_H +/* + * Kernel Probes (KProbes) + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch and Anil S Keshavamurthy + * adapted from i386 + */ +#include +#include + +#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) + +#ifdef CONFIG_KPROBES + +#include +#include +#include + +#define __ARCH_WANT_KPROBES_INSN_SLOT +#define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */ +#define NOP_M_INST (long)(1<<27) +#define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \ + (0x1L << 12) | /* many */ \ + (((i1) & 1) << 36) | ((i2) << 13))) /* imm */ + +typedef union cmp_inst { + struct { + unsigned long long qp : 6; + unsigned long long p1 : 6; + unsigned long long c : 1; + unsigned long long r2 : 7; + unsigned long long r3 : 7; + unsigned long long p2 : 6; + unsigned long long ta : 1; + unsigned long long x2 : 2; + unsigned long long tb : 1; + unsigned long long opcode : 4; + unsigned long long reserved : 23; + }f; + unsigned long long l; +} cmp_inst_t; + +struct kprobe; + +typedef struct _bundle { + struct { + unsigned long long template : 5; + unsigned long long slot0 : 41; + unsigned long long slot1_p0 : 64-46; + } quad0; + struct { + unsigned long long slot1_p1 : 41 - (64-46); + unsigned long long slot2 : 41; + } quad1; +} __attribute__((__aligned__(16))) bundle_t; + +struct prev_kprobe { + struct kprobe *kp; + unsigned long status; +}; + +#define MAX_PARAM_RSE_SIZE (0x60+0x60/0x3f) +/* per-cpu kprobe control block */ +#define ARCH_PREV_KPROBE_SZ 2 +struct kprobe_ctlblk { + unsigned long kprobe_status; + unsigned long *bsp; + unsigned long cfm; + atomic_t prev_kprobe_index; + struct prev_kprobe prev_kprobe[ARCH_PREV_KPROBE_SZ]; +}; + +#define kretprobe_blacklist_size 0 + +#define SLOT0_OPCODE_SHIFT (37) +#define SLOT1_p1_OPCODE_SHIFT (37 - (64-46)) +#define SLOT2_OPCODE_SHIFT (37) + +#define INDIRECT_CALL_OPCODE (1) +#define IP_RELATIVE_CALL_OPCODE (5) +#define IP_RELATIVE_BRANCH_OPCODE (4) +#define IP_RELATIVE_PREDICT_OPCODE (7) +#define LONG_BRANCH_OPCODE (0xC) +#define LONG_CALL_OPCODE (0xD) +#define flush_insn_slot(p) do { } while (0) + +typedef struct kprobe_opcode { + bundle_t bundle; +} kprobe_opcode_t; + +/* Architecture specific copy of original instruction*/ +struct arch_specific_insn { + /* copy of the instruction to be emulated */ + kprobe_opcode_t *insn; + #define INST_FLAG_FIX_RELATIVE_IP_ADDR 1 + #define INST_FLAG_FIX_BRANCH_REG 2 + #define INST_FLAG_BREAK_INST 4 + #define INST_FLAG_BOOSTABLE 8 + unsigned long inst_flag; + unsigned short target_br_reg; + unsigned short slot; +}; + +extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr); +extern int kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data); + +extern void arch_remove_kprobe(struct kprobe *p); + +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_KPROBES_H */ diff --git a/arch/ia64/include/asm/kregs.h b/arch/ia64/include/asm/kregs.h new file mode 100644 index 000000000000..44113b75e4eb --- /dev/null +++ b/arch/ia64/include/asm/kregs.h @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_KREGS_H +#define _ASM_IA64_KREGS_H + +/* + * Copyright (C) 2001-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * This file defines the kernel register usage convention used by Linux/ia64. + */ + +/* + * Kernel registers: + */ +#define IA64_KR_IO_BASE 0 /* ar.k0: legacy I/O base address */ +#define IA64_KR_TSSD 1 /* ar.k1: IVE uses this as the TSSD */ +#define IA64_KR_PER_CPU_DATA 3 /* ar.k3: physical per-CPU base */ +#define IA64_KR_CURRENT_STACK 4 /* ar.k4: what's mapped in IA64_TR_CURRENT_STACK */ +#define IA64_KR_FPU_OWNER 5 /* ar.k5: fpu-owner (UP only, at the moment) */ +#define IA64_KR_CURRENT 6 /* ar.k6: "current" task pointer */ +#define IA64_KR_PT_BASE 7 /* ar.k7: page table base address (physical) */ + +#define _IA64_KR_PASTE(x,y) x##y +#define _IA64_KR_PREFIX(n) _IA64_KR_PASTE(ar.k, n) +#define IA64_KR(n) _IA64_KR_PREFIX(IA64_KR_##n) + +/* + * Translation registers: + */ +#define IA64_TR_KERNEL 0 /* itr0, dtr0: maps kernel image (code & data) */ +#define IA64_TR_PALCODE 1 /* itr1: maps PALcode as required by EFI */ +#define IA64_TR_CURRENT_STACK 1 /* dtr1: maps kernel's memory- & register-stacks */ + +#define IA64_TR_ALLOC_BASE 2 /* itr&dtr: Base of dynamic TR resource*/ +#define IA64_TR_ALLOC_MAX 64 /* Max number for dynamic use*/ + +/* Processor status register bits: */ +#define IA64_PSR_BE_BIT 1 +#define IA64_PSR_UP_BIT 2 +#define IA64_PSR_AC_BIT 3 +#define IA64_PSR_MFL_BIT 4 +#define IA64_PSR_MFH_BIT 5 +#define IA64_PSR_IC_BIT 13 +#define IA64_PSR_I_BIT 14 +#define IA64_PSR_PK_BIT 15 +#define IA64_PSR_DT_BIT 17 +#define IA64_PSR_DFL_BIT 18 +#define IA64_PSR_DFH_BIT 19 +#define IA64_PSR_SP_BIT 20 +#define IA64_PSR_PP_BIT 21 +#define IA64_PSR_DI_BIT 22 +#define IA64_PSR_SI_BIT 23 +#define IA64_PSR_DB_BIT 24 +#define IA64_PSR_LP_BIT 25 +#define IA64_PSR_TB_BIT 26 +#define IA64_PSR_RT_BIT 27 +/* The following are not affected by save_flags()/restore_flags(): */ +#define IA64_PSR_CPL0_BIT 32 +#define IA64_PSR_CPL1_BIT 33 +#define IA64_PSR_IS_BIT 34 +#define IA64_PSR_MC_BIT 35 +#define IA64_PSR_IT_BIT 36 +#define IA64_PSR_ID_BIT 37 +#define IA64_PSR_DA_BIT 38 +#define IA64_PSR_DD_BIT 39 +#define IA64_PSR_SS_BIT 40 +#define IA64_PSR_RI_BIT 41 +#define IA64_PSR_ED_BIT 43 +#define IA64_PSR_BN_BIT 44 +#define IA64_PSR_IA_BIT 45 + +/* A mask of PSR bits that we generally don't want to inherit across a clone2() or an + execve(). Only list flags here that need to be cleared/set for BOTH clone2() and + execve(). */ +#define IA64_PSR_BITS_TO_CLEAR (IA64_PSR_MFL | IA64_PSR_MFH | IA64_PSR_DB | IA64_PSR_LP | \ + IA64_PSR_TB | IA64_PSR_ID | IA64_PSR_DA | IA64_PSR_DD | \ + IA64_PSR_SS | IA64_PSR_ED | IA64_PSR_IA) +#define IA64_PSR_BITS_TO_SET (IA64_PSR_DFH | IA64_PSR_SP) + +#define IA64_PSR_BE (__IA64_UL(1) << IA64_PSR_BE_BIT) +#define IA64_PSR_UP (__IA64_UL(1) << IA64_PSR_UP_BIT) +#define IA64_PSR_AC (__IA64_UL(1) << IA64_PSR_AC_BIT) +#define IA64_PSR_MFL (__IA64_UL(1) << IA64_PSR_MFL_BIT) +#define IA64_PSR_MFH (__IA64_UL(1) << IA64_PSR_MFH_BIT) +#define IA64_PSR_IC (__IA64_UL(1) << IA64_PSR_IC_BIT) +#define IA64_PSR_I (__IA64_UL(1) << IA64_PSR_I_BIT) +#define IA64_PSR_PK (__IA64_UL(1) << IA64_PSR_PK_BIT) +#define IA64_PSR_DT (__IA64_UL(1) << IA64_PSR_DT_BIT) +#define IA64_PSR_DFL (__IA64_UL(1) << IA64_PSR_DFL_BIT) +#define IA64_PSR_DFH (__IA64_UL(1) << IA64_PSR_DFH_BIT) +#define IA64_PSR_SP (__IA64_UL(1) << IA64_PSR_SP_BIT) +#define IA64_PSR_PP (__IA64_UL(1) << IA64_PSR_PP_BIT) +#define IA64_PSR_DI (__IA64_UL(1) << IA64_PSR_DI_BIT) +#define IA64_PSR_SI (__IA64_UL(1) << IA64_PSR_SI_BIT) +#define IA64_PSR_DB (__IA64_UL(1) << IA64_PSR_DB_BIT) +#define IA64_PSR_LP (__IA64_UL(1) << IA64_PSR_LP_BIT) +#define IA64_PSR_TB (__IA64_UL(1) << IA64_PSR_TB_BIT) +#define IA64_PSR_RT (__IA64_UL(1) << IA64_PSR_RT_BIT) +/* The following are not affected by save_flags()/restore_flags(): */ +#define IA64_PSR_CPL (__IA64_UL(3) << IA64_PSR_CPL0_BIT) +#define IA64_PSR_IS (__IA64_UL(1) << IA64_PSR_IS_BIT) +#define IA64_PSR_MC (__IA64_UL(1) << IA64_PSR_MC_BIT) +#define IA64_PSR_IT (__IA64_UL(1) << IA64_PSR_IT_BIT) +#define IA64_PSR_ID (__IA64_UL(1) << IA64_PSR_ID_BIT) +#define IA64_PSR_DA (__IA64_UL(1) << IA64_PSR_DA_BIT) +#define IA64_PSR_DD (__IA64_UL(1) << IA64_PSR_DD_BIT) +#define IA64_PSR_SS (__IA64_UL(1) << IA64_PSR_SS_BIT) +#define IA64_PSR_RI (__IA64_UL(3) << IA64_PSR_RI_BIT) +#define IA64_PSR_ED (__IA64_UL(1) << IA64_PSR_ED_BIT) +#define IA64_PSR_BN (__IA64_UL(1) << IA64_PSR_BN_BIT) +#define IA64_PSR_IA (__IA64_UL(1) << IA64_PSR_IA_BIT) + +/* User mask bits: */ +#define IA64_PSR_UM (IA64_PSR_BE | IA64_PSR_UP | IA64_PSR_AC | IA64_PSR_MFL | IA64_PSR_MFH) + +/* Default Control Register */ +#define IA64_DCR_PP_BIT 0 /* privileged performance monitor default */ +#define IA64_DCR_BE_BIT 1 /* big-endian default */ +#define IA64_DCR_LC_BIT 2 /* ia32 lock-check enable */ +#define IA64_DCR_DM_BIT 8 /* defer TLB miss faults */ +#define IA64_DCR_DP_BIT 9 /* defer page-not-present faults */ +#define IA64_DCR_DK_BIT 10 /* defer key miss faults */ +#define IA64_DCR_DX_BIT 11 /* defer key permission faults */ +#define IA64_DCR_DR_BIT 12 /* defer access right faults */ +#define IA64_DCR_DA_BIT 13 /* defer access bit faults */ +#define IA64_DCR_DD_BIT 14 /* defer debug faults */ + +#define IA64_DCR_PP (__IA64_UL(1) << IA64_DCR_PP_BIT) +#define IA64_DCR_BE (__IA64_UL(1) << IA64_DCR_BE_BIT) +#define IA64_DCR_LC (__IA64_UL(1) << IA64_DCR_LC_BIT) +#define IA64_DCR_DM (__IA64_UL(1) << IA64_DCR_DM_BIT) +#define IA64_DCR_DP (__IA64_UL(1) << IA64_DCR_DP_BIT) +#define IA64_DCR_DK (__IA64_UL(1) << IA64_DCR_DK_BIT) +#define IA64_DCR_DX (__IA64_UL(1) << IA64_DCR_DX_BIT) +#define IA64_DCR_DR (__IA64_UL(1) << IA64_DCR_DR_BIT) +#define IA64_DCR_DA (__IA64_UL(1) << IA64_DCR_DA_BIT) +#define IA64_DCR_DD (__IA64_UL(1) << IA64_DCR_DD_BIT) + +/* Interrupt Status Register */ +#define IA64_ISR_X_BIT 32 /* execute access */ +#define IA64_ISR_W_BIT 33 /* write access */ +#define IA64_ISR_R_BIT 34 /* read access */ +#define IA64_ISR_NA_BIT 35 /* non-access */ +#define IA64_ISR_SP_BIT 36 /* speculative load exception */ +#define IA64_ISR_RS_BIT 37 /* mandatory register-stack exception */ +#define IA64_ISR_IR_BIT 38 /* invalid register frame exception */ +#define IA64_ISR_CODE_MASK 0xf + +#define IA64_ISR_X (__IA64_UL(1) << IA64_ISR_X_BIT) +#define IA64_ISR_W (__IA64_UL(1) << IA64_ISR_W_BIT) +#define IA64_ISR_R (__IA64_UL(1) << IA64_ISR_R_BIT) +#define IA64_ISR_NA (__IA64_UL(1) << IA64_ISR_NA_BIT) +#define IA64_ISR_SP (__IA64_UL(1) << IA64_ISR_SP_BIT) +#define IA64_ISR_RS (__IA64_UL(1) << IA64_ISR_RS_BIT) +#define IA64_ISR_IR (__IA64_UL(1) << IA64_ISR_IR_BIT) + +/* ISR code field for non-access instructions */ +#define IA64_ISR_CODE_TPA 0 +#define IA64_ISR_CODE_FC 1 +#define IA64_ISR_CODE_PROBE 2 +#define IA64_ISR_CODE_TAK 3 +#define IA64_ISR_CODE_LFETCH 4 +#define IA64_ISR_CODE_PROBEF 5 + +#endif /* _ASM_IA64_kREGS_H */ diff --git a/arch/ia64/include/asm/libata-portmap.h b/arch/ia64/include/asm/libata-portmap.h new file mode 100644 index 000000000000..757f84e5dc6e --- /dev/null +++ b/arch/ia64/include/asm/libata-portmap.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_IA64_LIBATA_PORTMAP_H +#define __ASM_IA64_LIBATA_PORTMAP_H + +#define ATA_PRIMARY_IRQ(dev) isa_irq_to_vector(14) + +#define ATA_SECONDARY_IRQ(dev) isa_irq_to_vector(15) + +#endif diff --git a/arch/ia64/include/asm/linkage.h b/arch/ia64/include/asm/linkage.h new file mode 100644 index 000000000000..5178af560925 --- /dev/null +++ b/arch/ia64/include/asm/linkage.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_LINKAGE_H +#define __ASM_LINKAGE_H + +#ifndef __ASSEMBLY__ + +#define asmlinkage CPP_ASMLINKAGE __attribute__((syscall_linkage)) + +#else + +#include + +#endif + +#define cond_syscall(x) asm(".weak\t" #x "#\n" #x "#\t=\tsys_ni_syscall#") +#define SYSCALL_ALIAS(alias, name) \ + asm ( #alias "# = " #name "#\n\t.globl " #alias "#") + +#endif diff --git a/arch/ia64/include/asm/local.h b/arch/ia64/include/asm/local.h new file mode 100644 index 000000000000..c11c530f74d0 --- /dev/null +++ b/arch/ia64/include/asm/local.h @@ -0,0 +1 @@ +#include diff --git a/arch/ia64/include/asm/mca.h b/arch/ia64/include/asm/mca.h new file mode 100644 index 000000000000..05805249296c --- /dev/null +++ b/arch/ia64/include/asm/mca.h @@ -0,0 +1,185 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * File: mca.h + * Purpose: Machine check handling specific defines + * + * Copyright (C) 1999, 2004 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander + * Copyright (C) Srinivasa Thirumalachar + * Copyright (C) Russ Anderson + */ + +#ifndef _ASM_IA64_MCA_H +#define _ASM_IA64_MCA_H + +#if !defined(__ASSEMBLY__) + +#include +#include +#include +#include + +#define IA64_MCA_RENDEZ_TIMEOUT (20 * 1000) /* value in milliseconds - 20 seconds */ + +typedef struct ia64_fptr { + unsigned long fp; + unsigned long gp; +} ia64_fptr_t; + +typedef union cmcv_reg_u { + u64 cmcv_regval; + struct { + u64 cmcr_vector : 8; + u64 cmcr_reserved1 : 4; + u64 cmcr_ignored1 : 1; + u64 cmcr_reserved2 : 3; + u64 cmcr_mask : 1; + u64 cmcr_ignored2 : 47; + } cmcv_reg_s; + +} cmcv_reg_t; + +#define cmcv_mask cmcv_reg_s.cmcr_mask +#define cmcv_vector cmcv_reg_s.cmcr_vector + +enum { + IA64_MCA_RENDEZ_CHECKIN_NOTDONE = 0x0, + IA64_MCA_RENDEZ_CHECKIN_DONE = 0x1, + IA64_MCA_RENDEZ_CHECKIN_INIT = 0x2, + IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA = 0x3, +}; + +/* Information maintained by the MC infrastructure */ +typedef struct ia64_mc_info_s { + u64 imi_mca_handler; + size_t imi_mca_handler_size; + u64 imi_monarch_init_handler; + size_t imi_monarch_init_handler_size; + u64 imi_slave_init_handler; + size_t imi_slave_init_handler_size; + u8 imi_rendez_checkin[NR_CPUS]; + +} ia64_mc_info_t; + +/* Handover state from SAL to OS and vice versa, for both MCA and INIT events. + * Besides the handover state, it also contains some saved registers from the + * time of the event. + * Note: mca_asm.S depends on the precise layout of this structure. + */ + +struct ia64_sal_os_state { + + /* SAL to OS */ + unsigned long os_gp; /* GP of the os registered with the SAL, physical */ + unsigned long pal_proc; /* PAL_PROC entry point, physical */ + unsigned long sal_proc; /* SAL_PROC entry point, physical */ + unsigned long rv_rc; /* MCA - Rendezvous state, INIT - reason code */ + unsigned long proc_state_param; /* from R18 */ + unsigned long monarch; /* 1 for a monarch event, 0 for a slave */ + + /* common */ + unsigned long sal_ra; /* Return address in SAL, physical */ + unsigned long sal_gp; /* GP of the SAL - physical */ + struct pal_min_state_area *pal_min_state; /* from R17. physical in asm, virtual in C */ + /* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK). + * Note: if the MCA/INIT recovery code wants to resume to a new context + * then it must change these values to reflect the new kernel stack. + */ + unsigned long prev_IA64_KR_CURRENT; /* previous value of IA64_KR(CURRENT) */ + unsigned long prev_IA64_KR_CURRENT_STACK; + struct task_struct *prev_task; /* previous task, NULL if it is not useful */ + /* Some interrupt registers are not saved in minstate, pt_regs or + * switch_stack. Because MCA/INIT can occur when interrupts are + * disabled, we need to save the additional interrupt registers over + * MCA/INIT and resume. + */ + unsigned long isr; + unsigned long ifa; + unsigned long itir; + unsigned long iipa; + unsigned long iim; + unsigned long iha; + + /* OS to SAL */ + unsigned long os_status; /* OS status to SAL, enum below */ + unsigned long context; /* 0 if return to same context + 1 if return to new context */ + + /* I-resources */ + unsigned long iip; + unsigned long ipsr; + unsigned long ifs; +}; + +enum { + IA64_MCA_CORRECTED = 0x0, /* Error has been corrected by OS_MCA */ + IA64_MCA_WARM_BOOT = -1, /* Warm boot of the system need from SAL */ + IA64_MCA_COLD_BOOT = -2, /* Cold boot of the system need from SAL */ + IA64_MCA_HALT = -3 /* System to be halted by SAL */ +}; + +enum { + IA64_INIT_RESUME = 0x0, /* Resume after return from INIT */ + IA64_INIT_WARM_BOOT = -1, /* Warm boot of the system need from SAL */ +}; + +enum { + IA64_MCA_SAME_CONTEXT = 0x0, /* SAL to return to same context */ + IA64_MCA_NEW_CONTEXT = -1 /* SAL to return to new context */ +}; + +/* Per-CPU MCA state that is too big for normal per-CPU variables. */ + +struct ia64_mca_cpu { + u64 mca_stack[KERNEL_STACK_SIZE/8]; + u64 init_stack[KERNEL_STACK_SIZE/8]; +}; + +/* Array of physical addresses of each CPU's MCA area. */ +extern unsigned long __per_cpu_mca[NR_CPUS]; + +extern int cpe_vector; +extern int ia64_cpe_irq; +extern void ia64_mca_init(void); +extern void ia64_mca_irq_init(void); +extern void ia64_mca_cpu_init(void *); +extern void ia64_os_mca_dispatch(void); +extern void ia64_os_mca_dispatch_end(void); +extern void ia64_mca_ucmc_handler(struct pt_regs *, struct ia64_sal_os_state *); +extern void ia64_init_handler(struct pt_regs *, + struct switch_stack *, + struct ia64_sal_os_state *); +extern void ia64_os_init_on_kdump(void); +extern void ia64_monarch_init_handler(void); +extern void ia64_slave_init_handler(void); +extern void ia64_mca_cmc_vector_setup(void); +extern int ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)); +extern void ia64_unreg_MCA_extension(void); +extern unsigned long ia64_get_rnat(unsigned long *); +extern void ia64_set_psr_mc(void); +extern void ia64_mca_printk(const char * fmt, ...) + __attribute__ ((format (printf, 1, 2))); + +struct ia64_mca_notify_die { + struct ia64_sal_os_state *sos; + int *monarch_cpu; + int *data; +}; + +DECLARE_PER_CPU(u64, ia64_mca_pal_base); + +#else /* __ASSEMBLY__ */ + +#define IA64_MCA_CORRECTED 0x0 /* Error has been corrected by OS_MCA */ +#define IA64_MCA_WARM_BOOT -1 /* Warm boot of the system need from SAL */ +#define IA64_MCA_COLD_BOOT -2 /* Cold boot of the system need from SAL */ +#define IA64_MCA_HALT -3 /* System to be halted by SAL */ + +#define IA64_INIT_RESUME 0x0 /* Resume after return from INIT */ +#define IA64_INIT_WARM_BOOT -1 /* Warm boot of the system need from SAL */ + +#define IA64_MCA_SAME_CONTEXT 0x0 /* SAL to return to same context */ +#define IA64_MCA_NEW_CONTEXT -1 /* SAL to return to new context */ + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_MCA_H */ diff --git a/arch/ia64/include/asm/mca_asm.h b/arch/ia64/include/asm/mca_asm.h new file mode 100644 index 000000000000..e3ab1f41f1c3 --- /dev/null +++ b/arch/ia64/include/asm/mca_asm.h @@ -0,0 +1,245 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * File: mca_asm.h + * Purpose: Machine check handling specific defines + * + * Copyright (C) 1999 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander + * Copyright (C) Srinivasa Thirumalachar + * Copyright (C) 2000 Hewlett-Packard Co. + * Copyright (C) 2000 David Mosberger-Tang + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Jenna Hall + * Copyright (C) 2005 Silicon Graphics, Inc + * Copyright (C) 2005 Keith Owens + */ +#ifndef _ASM_IA64_MCA_ASM_H +#define _ASM_IA64_MCA_ASM_H + +#include + +#define PSR_IC 13 +#define PSR_I 14 +#define PSR_DT 17 +#define PSR_RT 27 +#define PSR_MC 35 +#define PSR_IT 36 +#define PSR_BN 44 + +/* + * This macro converts a instruction virtual address to a physical address + * Right now for simulation purposes the virtual addresses are + * direct mapped to physical addresses. + * 1. Lop off bits 61 thru 63 in the virtual address + */ +#define INST_VA_TO_PA(addr) \ + dep addr = 0, addr, 61, 3 +/* + * This macro converts a data virtual address to a physical address + * Right now for simulation purposes the virtual addresses are + * direct mapped to physical addresses. + * 1. Lop off bits 61 thru 63 in the virtual address + */ +#define DATA_VA_TO_PA(addr) \ + tpa addr = addr +/* + * This macro converts a data physical address to a virtual address + * Right now for simulation purposes the virtual addresses are + * direct mapped to physical addresses. + * 1. Put 0x7 in bits 61 thru 63. + */ +#define DATA_PA_TO_VA(addr,temp) \ + mov temp = 0x7 ;; \ + dep addr = temp, addr, 61, 3 + +#define GET_THIS_PADDR(reg, var) \ + mov reg = IA64_KR(PER_CPU_DATA);; \ + addl reg = THIS_CPU(var), reg + +/* + * This macro jumps to the instruction at the given virtual address + * and starts execution in physical mode with all the address + * translations turned off. + * 1. Save the current psr + * 2. Make sure that all the upper 32 bits are off + * + * 3. Clear the interrupt enable and interrupt state collection bits + * in the psr before updating the ipsr and iip. + * + * 4. Turn off the instruction, data and rse translation bits of the psr + * and store the new value into ipsr + * Also make sure that the interrupts are disabled. + * Ensure that we are in little endian mode. + * [psr.{rt, it, dt, i, be} = 0] + * + * 5. Get the physical address corresponding to the virtual address + * of the next instruction bundle and put it in iip. + * (Using magic numbers 24 and 40 in the deposint instruction since + * the IA64_SDK code directly maps to lower 24bits as physical address + * from a virtual address). + * + * 6. Do an rfi to move the values from ipsr to psr and iip to ip. + */ +#define PHYSICAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ + mov old_psr = psr; \ + ;; \ + dep old_psr = 0, old_psr, 32, 32; \ + \ + mov ar.rsc = 0 ; \ + ;; \ + srlz.d; \ + mov temp2 = ar.bspstore; \ + ;; \ + DATA_VA_TO_PA(temp2); \ + ;; \ + mov temp1 = ar.rnat; \ + ;; \ + mov ar.bspstore = temp2; \ + ;; \ + mov ar.rnat = temp1; \ + mov temp1 = psr; \ + mov temp2 = psr; \ + ;; \ + \ + dep temp2 = 0, temp2, PSR_IC, 2; \ + ;; \ + mov psr.l = temp2; \ + ;; \ + srlz.d; \ + dep temp1 = 0, temp1, 32, 32; \ + ;; \ + dep temp1 = 0, temp1, PSR_IT, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_DT, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_RT, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_I, 1; \ + ;; \ + dep temp1 = 0, temp1, PSR_IC, 1; \ + ;; \ + dep temp1 = -1, temp1, PSR_MC, 1; \ + ;; \ + mov cr.ipsr = temp1; \ + ;; \ + LOAD_PHYSICAL(p0, temp2, start_addr); \ + ;; \ + mov cr.iip = temp2; \ + mov cr.ifs = r0; \ + DATA_VA_TO_PA(sp); \ + DATA_VA_TO_PA(gp); \ + ;; \ + srlz.i; \ + ;; \ + nop 1; \ + nop 2; \ + nop 1; \ + nop 2; \ + rfi; \ + ;; + +/* + * This macro jumps to the instruction at the given virtual address + * and starts execution in virtual mode with all the address + * translations turned on. + * 1. Get the old saved psr + * + * 2. Clear the interrupt state collection bit in the current psr. + * + * 3. Set the instruction translation bit back in the old psr + * Note we have to do this since we are right now saving only the + * lower 32-bits of old psr.(Also the old psr has the data and + * rse translation bits on) + * + * 4. Set ipsr to this old_psr with "it" bit set and "bn" = 1. + * + * 5. Reset the current thread pointer (r13). + * + * 6. Set iip to the virtual address of the next instruction bundle. + * + * 7. Do an rfi to move ipsr to psr and iip to ip. + */ + +#define VIRTUAL_MODE_ENTER(temp1, temp2, start_addr, old_psr) \ + mov temp2 = psr; \ + ;; \ + mov old_psr = temp2; \ + ;; \ + dep temp2 = 0, temp2, PSR_IC, 2; \ + ;; \ + mov psr.l = temp2; \ + mov ar.rsc = 0; \ + ;; \ + srlz.d; \ + mov r13 = ar.k6; \ + mov temp2 = ar.bspstore; \ + ;; \ + DATA_PA_TO_VA(temp2,temp1); \ + ;; \ + mov temp1 = ar.rnat; \ + ;; \ + mov ar.bspstore = temp2; \ + ;; \ + mov ar.rnat = temp1; \ + ;; \ + mov temp1 = old_psr; \ + ;; \ + mov temp2 = 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_IC, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_IT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_DT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_RT, 1; \ + ;; \ + dep temp1 = temp2, temp1, PSR_BN, 1; \ + ;; \ + \ + mov cr.ipsr = temp1; \ + movl temp2 = start_addr; \ + ;; \ + mov cr.iip = temp2; \ + movl gp = __gp \ + ;; \ + DATA_PA_TO_VA(sp, temp1); \ + srlz.i; \ + ;; \ + nop 1; \ + nop 2; \ + nop 1; \ + rfi \ + ;; + +/* + * The MCA and INIT stacks in struct ia64_mca_cpu look like normal kernel + * stacks, except that the SAL/OS state and a switch_stack are stored near the + * top of the MCA/INIT stack. To support concurrent entry to MCA or INIT, as + * well as MCA over INIT, each event needs its own SAL/OS state. All entries + * are 16 byte aligned. + * + * +---------------------------+ + * | pt_regs | + * +---------------------------+ + * | switch_stack | + * +---------------------------+ + * | SAL/OS state | + * +---------------------------+ + * | 16 byte scratch area | + * +---------------------------+ <-------- SP at start of C MCA handler + * | ..... | + * +---------------------------+ + * | RBS for MCA/INIT handler | + * +---------------------------+ + * | struct task for MCA/INIT | + * +---------------------------+ <-------- Bottom of MCA/INIT stack + */ + +#define ALIGN16(x) ((x)&~15) +#define MCA_PT_REGS_OFFSET ALIGN16(KERNEL_STACK_SIZE-IA64_PT_REGS_SIZE) +#define MCA_SWITCH_STACK_OFFSET ALIGN16(MCA_PT_REGS_OFFSET-IA64_SWITCH_STACK_SIZE) +#define MCA_SOS_OFFSET ALIGN16(MCA_SWITCH_STACK_OFFSET-IA64_SAL_OS_STATE_SIZE) +#define MCA_SP_OFFSET ALIGN16(MCA_SOS_OFFSET-16) + +#endif /* _ASM_IA64_MCA_ASM_H */ diff --git a/arch/ia64/include/asm/meminit.h b/arch/ia64/include/asm/meminit.h new file mode 100644 index 000000000000..f1d5bf2ba847 --- /dev/null +++ b/arch/ia64/include/asm/meminit.h @@ -0,0 +1,59 @@ +#ifndef meminit_h +#define meminit_h + +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + + +/* + * Entries defined so far: + * - boot param structure itself + * - memory map + * - initrd (optional) + * - command line string + * - kernel code & data + * - crash dumping code reserved region + * - Kernel memory map built from EFI memory map + * - ELF core header + * + * More could be added if necessary + */ +#define IA64_MAX_RSVD_REGIONS 9 + +struct rsvd_region { + u64 start; /* virtual address of beginning of element */ + u64 end; /* virtual address of end of element + 1 */ +}; + +extern struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1]; + +extern void find_memory (void); +extern void reserve_memory (void); +extern void find_initrd (void); +extern int filter_rsvd_memory (u64 start, u64 end, void *arg); +extern int filter_memory (u64 start, u64 end, void *arg); +extern unsigned long efi_memmap_init(u64 *s, u64 *e); +extern int find_max_min_low_pfn (u64, u64, void *); + +extern unsigned long vmcore_find_descriptor_size(unsigned long address); + +/* + * For rounding an address to the next IA64_GRANULE_SIZE or order + */ +#define GRANULEROUNDDOWN(n) ((n) & ~(IA64_GRANULE_SIZE-1)) +#define GRANULEROUNDUP(n) (((n)+IA64_GRANULE_SIZE-1) & ~(IA64_GRANULE_SIZE-1)) + +#ifdef CONFIG_NUMA + extern void call_pernode_memory (unsigned long start, unsigned long len, void *func); +#else +# define call_pernode_memory(start, len, func) (*func)(start, len, 0) +#endif + +#define IGNORE_PFN0 1 /* XXX fix me: ignore pfn 0 until TLB miss handler is updated... */ + +extern int register_active_ranges(u64 start, u64 len, int nid); + +#endif /* meminit_h */ diff --git a/arch/ia64/include/asm/mman.h b/arch/ia64/include/asm/mman.h new file mode 100644 index 000000000000..15cf100add0e --- /dev/null +++ b/arch/ia64/include/asm/mman.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_MMAN_H +#define _ASM_IA64_MMAN_H + +#include + +#ifndef __ASSEMBLY__ +#define arch_mmap_check ia64_mmap_check +int ia64_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags); +#endif +#endif /* _ASM_IA64_MMAN_H */ diff --git a/arch/ia64/include/asm/mmiowb.h b/arch/ia64/include/asm/mmiowb.h new file mode 100644 index 000000000000..d67aab4ea3b4 --- /dev/null +++ b/arch/ia64/include/asm/mmiowb.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _ASM_IA64_MMIOWB_H +#define _ASM_IA64_MMIOWB_H + +/** + * mmiowb - I/O write barrier + * + * Ensure ordering of I/O space writes. This will make sure that writes + * following the barrier will arrive after all previous writes. For most + * ia64 platforms, this is a simple 'mf.a' instruction. + */ +#define mmiowb() ia64_mfa() + +#include + +#endif /* _ASM_IA64_MMIOWB_H */ diff --git a/arch/ia64/include/asm/mmu.h b/arch/ia64/include/asm/mmu.h new file mode 100644 index 000000000000..f75f44f531c2 --- /dev/null +++ b/arch/ia64/include/asm/mmu.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __MMU_H +#define __MMU_H + +/* + * Type for a context number. We declare it volatile to ensure proper + * ordering when it's accessed outside of spinlock'd critical sections + * (e.g., as done in activate_mm() and init_new_context()). + */ +typedef volatile unsigned long mm_context_t; + +typedef unsigned long nv_mm_context_t; + +#endif diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h new file mode 100644 index 000000000000..06257e355d00 --- /dev/null +++ b/arch/ia64/include/asm/mmu_context.h @@ -0,0 +1,194 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_MMU_CONTEXT_H +#define _ASM_IA64_MMU_CONTEXT_H + +/* + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +/* + * Routines to manage the allocation of task context numbers. Task context + * numbers are used to reduce or eliminate the need to perform TLB flushes + * due to context switches. Context numbers are implemented using ia-64 + * region ids. Since the IA-64 TLB does not consider the region number when + * performing a TLB lookup, we need to assign a unique region id to each + * region in a process. We use the least significant three bits in aregion + * id for this purpose. + */ + +#define IA64_REGION_ID_KERNEL 0 /* the kernel's region id (tlb.c depends on this being 0) */ + +#define ia64_rid(ctx,addr) (((ctx) << 3) | (addr >> 61)) + +# include +# ifndef __ASSEMBLY__ + +#include +#include +#include +#include +#include + +#include +#include + +struct ia64_ctx { + spinlock_t lock; + unsigned int next; /* next context number to use */ + unsigned int limit; /* available free range */ + unsigned int max_ctx; /* max. context value supported by all CPUs */ + /* call wrap_mmu_context when next >= max */ + unsigned long *bitmap; /* bitmap size is max_ctx+1 */ + unsigned long *flushmap;/* pending rid to be flushed */ +}; + +extern struct ia64_ctx ia64_ctx; +DECLARE_PER_CPU(u8, ia64_need_tlb_flush); + +extern void mmu_context_init (void); +extern void wrap_mmu_context (struct mm_struct *mm); + +/* + * When the context counter wraps around all TLBs need to be flushed because + * an old context number might have been reused. This is signalled by the + * ia64_need_tlb_flush per-CPU variable, which is checked in the routine + * below. Called by activate_mm(). + */ +static inline void +delayed_tlb_flush (void) +{ + extern void local_flush_tlb_all (void); + unsigned long flags; + + if (unlikely(__ia64_per_cpu_var(ia64_need_tlb_flush))) { + spin_lock_irqsave(&ia64_ctx.lock, flags); + if (__ia64_per_cpu_var(ia64_need_tlb_flush)) { + local_flush_tlb_all(); + __ia64_per_cpu_var(ia64_need_tlb_flush) = 0; + } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); + } +} + +static inline nv_mm_context_t +get_mmu_context (struct mm_struct *mm) +{ + unsigned long flags; + nv_mm_context_t context = mm->context; + + if (likely(context)) + goto out; + + spin_lock_irqsave(&ia64_ctx.lock, flags); + /* re-check, now that we've got the lock: */ + context = mm->context; + if (context == 0) { + cpumask_clear(mm_cpumask(mm)); + if (ia64_ctx.next >= ia64_ctx.limit) { + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + if (ia64_ctx.next >= ia64_ctx.max_ctx) + wrap_mmu_context(mm); + } + mm->context = context = ia64_ctx.next++; + __set_bit(context, ia64_ctx.bitmap); + } + spin_unlock_irqrestore(&ia64_ctx.lock, flags); +out: + /* + * Ensure we're not starting to use "context" before any old + * uses of it are gone from our TLB. + */ + delayed_tlb_flush(); + + return context; +} + +/* + * Initialize context number to some sane value. MM is guaranteed to be a + * brand-new address-space, so no TLB flushing is needed, ever. + */ +#define init_new_context init_new_context +static inline int +init_new_context (struct task_struct *p, struct mm_struct *mm) +{ + mm->context = 0; + return 0; +} + +static inline void +reload_context (nv_mm_context_t context) +{ + unsigned long rid; + unsigned long rid_incr = 0; + unsigned long rr0, rr1, rr2, rr3, rr4; + +#ifdef CONFIG_HUGETLB_PAGE + unsigned long old_rr4; + old_rr4 = ia64_get_rr(RGN_BASE(RGN_HPAGE)); +#endif + rid = context << 3; /* make space for encoding the region number */ + rid_incr = 1 << 8; + + /* encode the region id, preferred page size, and VHPT enable bit: */ + rr0 = (rid << 8) | (PAGE_SHIFT << 2) | 1; + rr1 = rr0 + 1*rid_incr; + rr2 = rr0 + 2*rid_incr; + rr3 = rr0 + 3*rid_incr; + rr4 = rr0 + 4*rid_incr; +#ifdef CONFIG_HUGETLB_PAGE + rr4 = (rr4 & (~(0xfcUL))) | (old_rr4 & 0xfc); + +# if RGN_HPAGE != 4 +# error "reload_context assumes RGN_HPAGE is 4" +# endif +#endif + + ia64_set_rr0_to_rr4(rr0, rr1, rr2, rr3, rr4); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +/* + * Must be called with preemption off + */ +static inline void +activate_context (struct mm_struct *mm) +{ + nv_mm_context_t context; + + do { + context = get_mmu_context(mm); + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(mm))) + cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm)); + reload_context(context); + /* + * in the unlikely event of a TLB-flush by another thread, + * redo the load. + */ + } while (unlikely(context != mm->context)); +} + +/* + * Switch from address space PREV to address space NEXT. + */ +#define activate_mm activate_mm +static inline void +activate_mm (struct mm_struct *prev, struct mm_struct *next) +{ + /* + * We may get interrupts here, but that's OK because interrupt + * handlers cannot touch user-space. + */ + ia64_set_kr(IA64_KR_PT_BASE, __pa(next->pgd)); + activate_context(next); +} + +#define switch_mm(prev_mm,next_mm,next_task) activate_mm(prev_mm, next_mm) + +#include + +# endif /* ! __ASSEMBLY__ */ +#endif /* _ASM_IA64_MMU_CONTEXT_H */ diff --git a/arch/ia64/include/asm/mmzone.h b/arch/ia64/include/asm/mmzone.h new file mode 100644 index 000000000000..767201f66c93 --- /dev/null +++ b/arch/ia64/include/asm/mmzone.h @@ -0,0 +1,35 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000,2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Erich Focht + * Copyright (c) 2002 Kimio Suganuma + */ +#ifndef _ASM_IA64_MMZONE_H +#define _ASM_IA64_MMZONE_H + +#include +#include +#include + +#ifdef CONFIG_NUMA + +static inline int pfn_to_nid(unsigned long pfn) +{ + extern int paddr_to_nid(unsigned long); + int nid = paddr_to_nid(pfn << PAGE_SHIFT); + if (nid < 0) + return 0; + else + return nid; +} + +#define MAX_PHYSNODE_ID 2048 +#endif /* CONFIG_NUMA */ + +#define NR_NODE_MEMBLKS (MAX_NUMNODES * 4) + +#endif /* _ASM_IA64_MMZONE_H */ diff --git a/arch/ia64/include/asm/module.h b/arch/ia64/include/asm/module.h new file mode 100644 index 000000000000..7271b9c5fc76 --- /dev/null +++ b/arch/ia64/include/asm/module.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_MODULE_H +#define _ASM_IA64_MODULE_H + +#include + +/* + * IA-64-specific support for kernel module loader. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +struct elf64_shdr; /* forward declration */ + +struct mod_arch_specific { + /* Used only at module load time. */ + struct elf64_shdr *core_plt; /* core PLT section */ + struct elf64_shdr *init_plt; /* init PLT section */ + struct elf64_shdr *got; /* global offset table */ + struct elf64_shdr *opd; /* official procedure descriptors */ + struct elf64_shdr *unwind; /* unwind-table section */ + unsigned long gp; /* global-pointer for module */ + unsigned int next_got_entry; /* index of next available got entry */ + + /* Used at module run and cleanup time. */ + void *core_unw_table; /* core unwind-table cookie returned by unwinder */ + void *init_unw_table; /* init unwind-table cookie returned by unwinder */ + void *opd_addr; /* symbolize uses .opd to get to actual function */ + unsigned long opd_size; +}; + +#define ARCH_SHF_SMALL SHF_IA_64_SHORT + +#endif /* _ASM_IA64_MODULE_H */ diff --git a/arch/ia64/include/asm/module.lds.h b/arch/ia64/include/asm/module.lds.h new file mode 100644 index 000000000000..eff68f362793 --- /dev/null +++ b/arch/ia64/include/asm/module.lds.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +SECTIONS { + /* Group unwind sections into a single section: */ + .IA_64.unwind_info : { *(.IA_64.unwind_info*) } + .IA_64.unwind : { *(.IA_64.unwind*) } + /* + * Create place-holder sections to hold the PLTs, GOT, and + * official procedure-descriptors (.opd). + */ + .core.plt : { BYTE(0) } + .init.plt : { BYTE(0) } + .got : { BYTE(0) } + .opd : { BYTE(0) } +} diff --git a/arch/ia64/include/asm/msidef.h b/arch/ia64/include/asm/msidef.h new file mode 100644 index 000000000000..18d0e4226748 --- /dev/null +++ b/arch/ia64/include/asm/msidef.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _IA64_MSI_DEF_H +#define _IA64_MSI_DEF_H + +/* + * Shifts for APIC-based data + */ + +#define MSI_DATA_VECTOR_SHIFT 0 +#define MSI_DATA_VECTOR(v) (((u8)v) << MSI_DATA_VECTOR_SHIFT) +#define MSI_DATA_VECTOR_MASK 0xffffff00 + +#define MSI_DATA_DELIVERY_MODE_SHIFT 8 +#define MSI_DATA_DELIVERY_FIXED (0 << MSI_DATA_DELIVERY_MODE_SHIFT) +#define MSI_DATA_DELIVERY_LOWPRI (1 << MSI_DATA_DELIVERY_MODE_SHIFT) + +#define MSI_DATA_LEVEL_SHIFT 14 +#define MSI_DATA_LEVEL_DEASSERT (0 << MSI_DATA_LEVEL_SHIFT) +#define MSI_DATA_LEVEL_ASSERT (1 << MSI_DATA_LEVEL_SHIFT) + +#define MSI_DATA_TRIGGER_SHIFT 15 +#define MSI_DATA_TRIGGER_EDGE (0 << MSI_DATA_TRIGGER_SHIFT) +#define MSI_DATA_TRIGGER_LEVEL (1 << MSI_DATA_TRIGGER_SHIFT) + +/* + * Shift/mask fields for APIC-based bus address + */ + +#define MSI_ADDR_DEST_ID_SHIFT 4 +#define MSI_ADDR_HEADER 0xfee00000 + +#define MSI_ADDR_DEST_ID_MASK 0xfff0000f +#define MSI_ADDR_DEST_ID_CPU(cpu) ((cpu) << MSI_ADDR_DEST_ID_SHIFT) + +#define MSI_ADDR_DEST_MODE_SHIFT 2 +#define MSI_ADDR_DEST_MODE_PHYS (0 << MSI_ADDR_DEST_MODE_SHIFT) +#define MSI_ADDR_DEST_MODE_LOGIC (1 << MSI_ADDR_DEST_MODE_SHIFT) + +#define MSI_ADDR_REDIRECTION_SHIFT 3 +#define MSI_ADDR_REDIRECTION_CPU (0 << MSI_ADDR_REDIRECTION_SHIFT) +#define MSI_ADDR_REDIRECTION_LOWPRI (1 << MSI_ADDR_REDIRECTION_SHIFT) + +#endif/* _IA64_MSI_DEF_H */ diff --git a/arch/ia64/include/asm/native/inst.h b/arch/ia64/include/asm/native/inst.h new file mode 100644 index 000000000000..e08662396029 --- /dev/null +++ b/arch/ia64/include/asm/native/inst.h @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/****************************************************************************** + * arch/ia64/include/asm/native/inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + */ + +#define DO_SAVE_MIN IA64_NATIVE_DO_SAVE_MIN + +#define MOV_FROM_IFA(reg) \ + mov reg = cr.ifa + +#define MOV_FROM_ITIR(reg) \ + mov reg = cr.itir + +#define MOV_FROM_ISR(reg) \ + mov reg = cr.isr + +#define MOV_FROM_IHA(reg) \ + mov reg = cr.iha + +#define MOV_FROM_IPSR(pred, reg) \ +(pred) mov reg = cr.ipsr + +#define MOV_FROM_IIM(reg) \ + mov reg = cr.iim + +#define MOV_FROM_IIP(reg) \ + mov reg = cr.iip + +#define MOV_FROM_IVR(reg, clob) \ + mov reg = cr.ivr + +#define MOV_FROM_PSR(pred, reg, clob) \ +(pred) mov reg = psr + +#define MOV_FROM_ITC(pred, pred_clob, reg, clob) \ +(pred) mov reg = ar.itc + +#define MOV_TO_IFA(reg, clob) \ + mov cr.ifa = reg + +#define MOV_TO_ITIR(pred, reg, clob) \ +(pred) mov cr.itir = reg + +#define MOV_TO_IHA(pred, reg, clob) \ +(pred) mov cr.iha = reg + +#define MOV_TO_IPSR(pred, reg, clob) \ +(pred) mov cr.ipsr = reg + +#define MOV_TO_IFS(pred, reg, clob) \ +(pred) mov cr.ifs = reg + +#define MOV_TO_IIP(reg, clob) \ + mov cr.iip = reg + +#define MOV_TO_KR(kr, reg, clob0, clob1) \ + mov IA64_KR(kr) = reg + +#define ITC_I(pred, reg, clob) \ +(pred) itc.i reg + +#define ITC_D(pred, reg, clob) \ +(pred) itc.d reg + +#define ITC_I_AND_D(pred_i, pred_d, reg, clob) \ +(pred_i) itc.i reg; \ +(pred_d) itc.d reg + +#define THASH(pred, reg0, reg1, clob) \ +(pred) thash reg0 = reg1 + +#define SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(clob0, clob1) \ + ssm psr.ic | PSR_DEFAULT_BITS \ + ;; \ + srlz.i /* guarantee that interruption collectin is on */ \ + ;; + +#define SSM_PSR_IC_AND_SRLZ_D(clob0, clob1) \ + ssm psr.ic \ + ;; \ + srlz.d + +#define RSM_PSR_IC(clob) \ + rsm psr.ic + +#define SSM_PSR_I(pred, pred_clob, clob) \ +(pred) ssm psr.i + +#define RSM_PSR_I(pred, clob0, clob1) \ +(pred) rsm psr.i + +#define RSM_PSR_I_IC(clob0, clob1, clob2) \ + rsm psr.i | psr.ic + +#define RSM_PSR_DT \ + rsm psr.dt + +#define RSM_PSR_BE_I(clob0, clob1) \ + rsm psr.be | psr.i + +#define SSM_PSR_DT_AND_SRLZ_I \ + ssm psr.dt \ + ;; \ + srlz.i + +#define BSW_0(clob0, clob1, clob2) \ + bsw.0 + +#define BSW_1(clob0, clob1) \ + bsw.1 + +#define COVER \ + cover + +#define RFI \ + rfi diff --git a/arch/ia64/include/asm/native/irq.h b/arch/ia64/include/asm/native/irq.h new file mode 100644 index 000000000000..aa74915f8aa2 --- /dev/null +++ b/arch/ia64/include/asm/native/irq.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/****************************************************************************** + * arch/ia64/include/asm/native/irq.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + */ + +#ifndef _ASM_IA64_NATIVE_IRQ_H +#define _ASM_IA64_NATIVE_IRQ_H + +#define NR_VECTORS 256 + +#if (NR_VECTORS + 32 * NR_CPUS) < 1024 +#define IA64_NATIVE_NR_IRQS (NR_VECTORS + 32 * NR_CPUS) +#else +#define IA64_NATIVE_NR_IRQS 1024 +#endif + +#endif /* _ASM_IA64_NATIVE_IRQ_H */ diff --git a/arch/ia64/include/asm/native/patchlist.h b/arch/ia64/include/asm/native/patchlist.h new file mode 100644 index 000000000000..f13e7675758c --- /dev/null +++ b/arch/ia64/include/asm/native/patchlist.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/****************************************************************************** + * arch/ia64/include/asm/native/inst.h + * + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + */ + +#define __paravirt_start_gate_fsyscall_patchlist \ + __ia64_native_start_gate_fsyscall_patchlist +#define __paravirt_end_gate_fsyscall_patchlist \ + __ia64_native_end_gate_fsyscall_patchlist +#define __paravirt_start_gate_brl_fsys_bubble_down_patchlist \ + __ia64_native_start_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_end_gate_brl_fsys_bubble_down_patchlist \ + __ia64_native_end_gate_brl_fsys_bubble_down_patchlist +#define __paravirt_start_gate_vtop_patchlist \ + __ia64_native_start_gate_vtop_patchlist +#define __paravirt_end_gate_vtop_patchlist \ + __ia64_native_end_gate_vtop_patchlist +#define __paravirt_start_gate_mckinley_e9_patchlist \ + __ia64_native_start_gate_mckinley_e9_patchlist +#define __paravirt_end_gate_mckinley_e9_patchlist \ + __ia64_native_end_gate_mckinley_e9_patchlist diff --git a/arch/ia64/include/asm/nodedata.h b/arch/ia64/include/asm/nodedata.h new file mode 100644 index 000000000000..2fb337b0e9b7 --- /dev/null +++ b/arch/ia64/include/asm/nodedata.h @@ -0,0 +1,63 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Erich Focht + * Copyright (c) 2002 Kimio Suganuma + */ +#ifndef _ASM_IA64_NODEDATA_H +#define _ASM_IA64_NODEDATA_H + +#include + +#include +#include + +#ifdef CONFIG_NUMA + +/* + * Node Data. One of these structures is located on each node of a NUMA system. + */ + +struct pglist_data; +struct ia64_node_data { + short active_cpu_count; + short node; + struct pglist_data *pg_data_ptrs[MAX_NUMNODES]; +}; + + +/* + * Return a pointer to the node_data structure for the executing cpu. + */ +#define local_node_data (local_cpu_data->node_data) + +/* + * Given a node id, return a pointer to the pg_data_t for the node. + * + * NODE_DATA - should be used in all code not related to system + * initialization. It uses pernode data structures to minimize + * offnode memory references. However, these structure are not + * present during boot. This macro can be used once cpu_init + * completes. + */ +#define NODE_DATA(nid) (local_node_data->pg_data_ptrs[nid]) + +/* + * LOCAL_DATA_ADDR - This is to calculate the address of other node's + * "local_node_data" at hot-plug phase. The local_node_data + * is pointed by per_cpu_page. Kernel usually use it for + * just executing cpu. However, when new node is hot-added, + * the addresses of local data for other nodes are necessary + * to update all of them. + */ +#define LOCAL_DATA_ADDR(pgdat) \ + ((struct ia64_node_data *)((u64)(pgdat) + \ + L1_CACHE_ALIGN(sizeof(struct pglist_data)))) + +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_IA64_NODEDATA_H */ diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h new file mode 100644 index 000000000000..c5c253cb9bd6 --- /dev/null +++ b/arch/ia64/include/asm/numa.h @@ -0,0 +1,83 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific prototypes and definitions. + * + * 2002/08/05 Erich Focht + * + */ +#ifndef _ASM_IA64_NUMA_H +#define _ASM_IA64_NUMA_H + + +#ifdef CONFIG_NUMA + +#include +#include +#include +#include +#include + +#include + +extern u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +extern cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +extern pg_data_t *pgdat_list[MAX_NUMNODES]; + +/* Stuff below this line could be architecture independent */ + +extern int num_node_memblks; /* total number of memory chunks */ + +/* + * List of node memory chunks. Filled when parsing SRAT table to + * obtain information about memory nodes. +*/ + +struct node_memblk_s { + unsigned long start_paddr; + unsigned long size; + int nid; /* which logical node contains this chunk? */ + int bank; /* which mem bank on this node */ +}; + +struct node_cpuid_s { + u16 phys_id; /* id << 8 | eid */ + int nid; /* logical node containing this CPU */ +}; + +extern struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; +extern struct node_cpuid_s node_cpuid[NR_CPUS]; + +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + * + * This is a matrix with "distances" between nodes, they should be + * proportional to the memory access latency ratios. + */ + +extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; +#define slit_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) +extern int __node_distance(int from, int to); +#define node_distance(from,to) __node_distance(from, to) + +extern int paddr_to_nid(unsigned long paddr); + +#define local_nodeid (cpu_to_node_map[smp_processor_id()]) + +#define numa_off 0 + +extern void map_cpu_to_node(int cpu, int nid); +extern void unmap_cpu_from_node(int cpu, int nid); +extern void numa_clear_node(int cpu); + +#else /* !CONFIG_NUMA */ +#define map_cpu_to_node(cpu, nid) do{}while(0) +#define unmap_cpu_from_node(cpu, nid) do{}while(0) +#define paddr_to_nid(addr) 0 +#define numa_clear_node(cpu) do { } while (0) +#endif /* CONFIG_NUMA */ + +#endif /* _ASM_IA64_NUMA_H */ diff --git a/arch/ia64/include/asm/page.h b/arch/ia64/include/asm/page.h new file mode 100644 index 000000000000..310b09c3342d --- /dev/null +++ b/arch/ia64/include/asm/page.h @@ -0,0 +1,208 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PAGE_H +#define _ASM_IA64_PAGE_H +/* + * Pagetable related stuff. + * + * Copyright (C) 1998, 1999, 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + +/* + * The top three bits of an IA64 address are its Region Number. + * Different regions are assigned to different purposes. + */ +#define RGN_SHIFT (61) +#define RGN_BASE(r) (__IA64_UL_CONST(r)<> PAGE_SHIFT) + +#include + +#define page_to_phys(page) (page_to_pfn(page) << PAGE_SHIFT) +#define virt_to_page(kaddr) pfn_to_page(__pa(kaddr) >> PAGE_SHIFT) +#define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) + +typedef union ia64_va { + struct { + unsigned long off : 61; /* intra-region offset */ + unsigned long reg : 3; /* region number */ + } f; + unsigned long l; + void *p; +} ia64_va; + +/* + * Note: These macros depend on the fact that PAGE_OFFSET has all + * region bits set to 1 and all other bits set to zero. They are + * expressed in this way to ensure they result in a single "dep" + * instruction. + */ +#define __pa(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = 0; _v.l;}) +#define __va(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;}) + +#define REGION_NUMBER(x) ({ia64_va _v; _v.l = (long) (x); _v.f.reg;}) +#define REGION_OFFSET(x) ({ia64_va _v; _v.l = (long) (x); _v.f.off;}) + +#ifdef CONFIG_HUGETLB_PAGE +# define htlbpage_to_page(x) (((unsigned long) REGION_NUMBER(x) << 61) \ + | (REGION_OFFSET(x) >> (HPAGE_SHIFT-PAGE_SHIFT))) +# define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) +extern unsigned int hpage_shift; +#endif + +static __inline__ int +get_order (unsigned long size) +{ + long double d = size - 1; + long order; + + order = ia64_getf_exp(d); + order = order - PAGE_SHIFT - 0xffff + 1; + if (order < 0) + order = 0; + return order; +} + +#endif /* !__ASSEMBLY__ */ + +#ifdef STRICT_MM_TYPECHECKS + /* + * These are used to make use of C type-checking.. + */ + typedef struct { unsigned long pte; } pte_t; + typedef struct { unsigned long pmd; } pmd_t; +#if CONFIG_PGTABLE_LEVELS == 4 + typedef struct { unsigned long pud; } pud_t; +#endif + typedef struct { unsigned long pgd; } pgd_t; + typedef struct { unsigned long pgprot; } pgprot_t; + typedef struct page *pgtable_t; + +# define pte_val(x) ((x).pte) +# define pmd_val(x) ((x).pmd) +#if CONFIG_PGTABLE_LEVELS == 4 +# define pud_val(x) ((x).pud) +#endif +# define pgd_val(x) ((x).pgd) +# define pgprot_val(x) ((x).pgprot) + +# define __pte(x) ((pte_t) { (x) } ) +# define __pmd(x) ((pmd_t) { (x) } ) +# define __pgprot(x) ((pgprot_t) { (x) } ) + +#else /* !STRICT_MM_TYPECHECKS */ + /* + * .. while these make it easier on the compiler + */ +# ifndef __ASSEMBLY__ + typedef unsigned long pte_t; + typedef unsigned long pmd_t; + typedef unsigned long pgd_t; + typedef unsigned long pgprot_t; + typedef struct page *pgtable_t; +# endif + +# define pte_val(x) (x) +# define pmd_val(x) (x) +# define pgd_val(x) (x) +# define pgprot_val(x) (x) + +# define __pte(x) (x) +# define __pgd(x) (x) +# define __pgprot(x) (x) +#endif /* !STRICT_MM_TYPECHECKS */ + +#define PAGE_OFFSET RGN_BASE(RGN_KERNEL) + +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC + +#define GATE_ADDR RGN_BASE(RGN_GATE) + +/* + * 0xa000000000000000+2*PERCPU_PAGE_SIZE + * - 0xa000000000000000+3*PERCPU_PAGE_SIZE remain unmapped (guard page) + */ +#define KERNEL_START (GATE_ADDR+__IA64_UL_CONST(0x100000000)) +#define PERCPU_ADDR (-PERCPU_PAGE_SIZE) +#define LOAD_OFFSET (KERNEL_START - KERNEL_TR_PAGE_SIZE) + +#define __HAVE_ARCH_GATE_AREA 1 + +#endif /* _ASM_IA64_PAGE_H */ diff --git a/arch/ia64/include/asm/pal.h b/arch/ia64/include/asm/pal.h new file mode 100644 index 000000000000..e6b652f9e45e --- /dev/null +++ b/arch/ia64/include/asm/pal.h @@ -0,0 +1,1827 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PAL_H +#define _ASM_IA64_PAL_H + +/* + * Processor Abstraction Layer definitions. + * + * This is based on Intel IA-64 Architecture Software Developer's Manual rev 1.0 + * chapter 11 IA-64 Processor Abstraction Layer + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Srinivasa Prasad Thirumalachar + * Copyright (C) 2008 Silicon Graphics, Inc. (SGI) + * + * 99/10/01 davidm Make sure we pass zero for reserved parameters. + * 00/03/07 davidm Updated pal_cache_flush() to be in sync with PAL v2.6. + * 00/03/23 cfleck Modified processor min-state save area to match updated PAL & SAL info + * 00/05/24 eranian Updated to latest PAL spec, fix structures bugs, added + * 00/05/25 eranian Support for stack calls, and static physical calls + * 00/06/18 eranian Support for stacked physical calls + * 06/10/26 rja Support for Intel Itanium Architecture Software Developer's + * Manual Rev 2.2 (Jan 2006) + */ + +/* + * Note that some of these calls use a static-register only calling + * convention which has nothing to do with the regular calling + * convention. + */ +#define PAL_CACHE_FLUSH 1 /* flush i/d cache */ +#define PAL_CACHE_INFO 2 /* get detailed i/d cache info */ +#define PAL_CACHE_INIT 3 /* initialize i/d cache */ +#define PAL_CACHE_SUMMARY 4 /* get summary of cache hierarchy */ +#define PAL_MEM_ATTRIB 5 /* list supported memory attributes */ +#define PAL_PTCE_INFO 6 /* purge TLB info */ +#define PAL_VM_INFO 7 /* return supported virtual memory features */ +#define PAL_VM_SUMMARY 8 /* return summary on supported vm features */ +#define PAL_BUS_GET_FEATURES 9 /* return processor bus interface features settings */ +#define PAL_BUS_SET_FEATURES 10 /* set processor bus features */ +#define PAL_DEBUG_INFO 11 /* get number of debug registers */ +#define PAL_FIXED_ADDR 12 /* get fixed component of processors's directed address */ +#define PAL_FREQ_BASE 13 /* base frequency of the platform */ +#define PAL_FREQ_RATIOS 14 /* ratio of processor, bus and ITC frequency */ +#define PAL_PERF_MON_INFO 15 /* return performance monitor info */ +#define PAL_PLATFORM_ADDR 16 /* set processor interrupt block and IO port space addr */ +#define PAL_PROC_GET_FEATURES 17 /* get configurable processor features & settings */ +#define PAL_PROC_SET_FEATURES 18 /* enable/disable configurable processor features */ +#define PAL_RSE_INFO 19 /* return rse information */ +#define PAL_VERSION 20 /* return version of PAL code */ +#define PAL_MC_CLEAR_LOG 21 /* clear all processor log info */ +#define PAL_MC_DRAIN 22 /* drain operations which could result in an MCA */ +#define PAL_MC_EXPECTED 23 /* set/reset expected MCA indicator */ +#define PAL_MC_DYNAMIC_STATE 24 /* get processor dynamic state */ +#define PAL_MC_ERROR_INFO 25 /* get processor MCA info and static state */ +#define PAL_MC_RESUME 26 /* Return to interrupted process */ +#define PAL_MC_REGISTER_MEM 27 /* Register memory for PAL to use during MCAs and inits */ +#define PAL_HALT 28 /* enter the low power HALT state */ +#define PAL_HALT_LIGHT 29 /* enter the low power light halt state*/ +#define PAL_COPY_INFO 30 /* returns info needed to relocate PAL */ +#define PAL_CACHE_LINE_INIT 31 /* init tags & data of cache line */ +#define PAL_PMI_ENTRYPOINT 32 /* register PMI memory entry points with the processor */ +#define PAL_ENTER_IA_32_ENV 33 /* enter IA-32 system environment */ +#define PAL_VM_PAGE_SIZE 34 /* return vm TC and page walker page sizes */ + +#define PAL_MEM_FOR_TEST 37 /* get amount of memory needed for late processor test */ +#define PAL_CACHE_PROT_INFO 38 /* get i/d cache protection info */ +#define PAL_REGISTER_INFO 39 /* return AR and CR register information*/ +#define PAL_SHUTDOWN 40 /* enter processor shutdown state */ +#define PAL_PREFETCH_VISIBILITY 41 /* Make Processor Prefetches Visible */ +#define PAL_LOGICAL_TO_PHYSICAL 42 /* returns information on logical to physical processor mapping */ +#define PAL_CACHE_SHARED_INFO 43 /* returns information on caches shared by logical processor */ +#define PAL_GET_HW_POLICY 48 /* Get current hardware resource sharing policy */ +#define PAL_SET_HW_POLICY 49 /* Set current hardware resource sharing policy */ +#define PAL_VP_INFO 50 /* Information about virtual processor features */ +#define PAL_MC_HW_TRACKING 51 /* Hardware tracking status */ + +#define PAL_COPY_PAL 256 /* relocate PAL procedures and PAL PMI */ +#define PAL_HALT_INFO 257 /* return the low power capabilities of processor */ +#define PAL_TEST_PROC 258 /* perform late processor self-test */ +#define PAL_CACHE_READ 259 /* read tag & data of cacheline for diagnostic testing */ +#define PAL_CACHE_WRITE 260 /* write tag & data of cacheline for diagnostic testing */ +#define PAL_VM_TR_READ 261 /* read contents of translation register */ +#define PAL_GET_PSTATE 262 /* get the current P-state */ +#define PAL_SET_PSTATE 263 /* set the P-state */ +#define PAL_BRAND_INFO 274 /* Processor branding information */ + +#define PAL_GET_PSTATE_TYPE_LASTSET 0 +#define PAL_GET_PSTATE_TYPE_AVGANDRESET 1 +#define PAL_GET_PSTATE_TYPE_AVGNORESET 2 +#define PAL_GET_PSTATE_TYPE_INSTANT 3 + +#define PAL_MC_ERROR_INJECT 276 /* Injects processor error or returns injection capabilities */ + +#ifndef __ASSEMBLY__ + +#include +#include +#include + +/* + * Data types needed to pass information into PAL procedures and + * interpret information returned by them. + */ + +/* Return status from the PAL procedure */ +typedef s64 pal_status_t; + +#define PAL_STATUS_SUCCESS 0 /* No error */ +#define PAL_STATUS_UNIMPLEMENTED (-1) /* Unimplemented procedure */ +#define PAL_STATUS_EINVAL (-2) /* Invalid argument */ +#define PAL_STATUS_ERROR (-3) /* Error */ +#define PAL_STATUS_CACHE_INIT_FAIL (-4) /* Could not initialize the + * specified level and type of + * cache without sideeffects + * and "restrict" was 1 + */ +#define PAL_STATUS_REQUIRES_MEMORY (-9) /* Call requires PAL memory buffer */ + +/* Processor cache level in the hierarchy */ +typedef u64 pal_cache_level_t; +#define PAL_CACHE_LEVEL_L0 0 /* L0 */ +#define PAL_CACHE_LEVEL_L1 1 /* L1 */ +#define PAL_CACHE_LEVEL_L2 2 /* L2 */ + + +/* Processor cache type at a particular level in the hierarchy */ + +typedef u64 pal_cache_type_t; +#define PAL_CACHE_TYPE_INSTRUCTION 1 /* Instruction cache */ +#define PAL_CACHE_TYPE_DATA 2 /* Data or unified cache */ +#define PAL_CACHE_TYPE_INSTRUCTION_DATA 3 /* Both Data & Instruction */ + + +#define PAL_CACHE_FLUSH_INVALIDATE 1 /* Invalidate clean lines */ +#define PAL_CACHE_FLUSH_CHK_INTRS 2 /* check for interrupts/mc while flushing */ + +/* Processor cache line size in bytes */ +typedef int pal_cache_line_size_t; + +/* Processor cache line state */ +typedef u64 pal_cache_line_state_t; +#define PAL_CACHE_LINE_STATE_INVALID 0 /* Invalid */ +#define PAL_CACHE_LINE_STATE_SHARED 1 /* Shared */ +#define PAL_CACHE_LINE_STATE_EXCLUSIVE 2 /* Exclusive */ +#define PAL_CACHE_LINE_STATE_MODIFIED 3 /* Modified */ + +typedef struct pal_freq_ratio { + u32 den, num; /* numerator & denominator */ +} itc_ratio, proc_ratio; + +typedef union pal_cache_config_info_1_s { + struct { + u64 u : 1, /* 0 Unified cache ? */ + at : 2, /* 2-1 Cache mem attr*/ + reserved : 5, /* 7-3 Reserved */ + associativity : 8, /* 16-8 Associativity*/ + line_size : 8, /* 23-17 Line size */ + stride : 8, /* 31-24 Stride */ + store_latency : 8, /*39-32 Store latency*/ + load_latency : 8, /* 47-40 Load latency*/ + store_hints : 8, /* 55-48 Store hints*/ + load_hints : 8; /* 63-56 Load hints */ + } pcci1_bits; + u64 pcci1_data; +} pal_cache_config_info_1_t; + +typedef union pal_cache_config_info_2_s { + struct { + u32 cache_size; /*cache size in bytes*/ + + + u32 alias_boundary : 8, /* 39-32 aliased addr + * separation for max + * performance. + */ + tag_ls_bit : 8, /* 47-40 LSb of addr*/ + tag_ms_bit : 8, /* 55-48 MSb of addr*/ + reserved : 8; /* 63-56 Reserved */ + } pcci2_bits; + u64 pcci2_data; +} pal_cache_config_info_2_t; + + +typedef struct pal_cache_config_info_s { + pal_status_t pcci_status; + pal_cache_config_info_1_t pcci_info_1; + pal_cache_config_info_2_t pcci_info_2; + u64 pcci_reserved; +} pal_cache_config_info_t; + +#define pcci_ld_hints pcci_info_1.pcci1_bits.load_hints +#define pcci_st_hints pcci_info_1.pcci1_bits.store_hints +#define pcci_ld_latency pcci_info_1.pcci1_bits.load_latency +#define pcci_st_latency pcci_info_1.pcci1_bits.store_latency +#define pcci_stride pcci_info_1.pcci1_bits.stride +#define pcci_line_size pcci_info_1.pcci1_bits.line_size +#define pcci_assoc pcci_info_1.pcci1_bits.associativity +#define pcci_cache_attr pcci_info_1.pcci1_bits.at +#define pcci_unified pcci_info_1.pcci1_bits.u +#define pcci_tag_msb pcci_info_2.pcci2_bits.tag_ms_bit +#define pcci_tag_lsb pcci_info_2.pcci2_bits.tag_ls_bit +#define pcci_alias_boundary pcci_info_2.pcci2_bits.alias_boundary +#define pcci_cache_size pcci_info_2.pcci2_bits.cache_size + + + +/* Possible values for cache attributes */ + +#define PAL_CACHE_ATTR_WT 0 /* Write through cache */ +#define PAL_CACHE_ATTR_WB 1 /* Write back cache */ +#define PAL_CACHE_ATTR_WT_OR_WB 2 /* Either write thru or write + * back depending on TLB + * memory attributes + */ + + +/* Possible values for cache hints */ + +#define PAL_CACHE_HINT_TEMP_1 0 /* Temporal level 1 */ +#define PAL_CACHE_HINT_NTEMP_1 1 /* Non-temporal level 1 */ +#define PAL_CACHE_HINT_NTEMP_ALL 3 /* Non-temporal all levels */ + +/* Processor cache protection information */ +typedef union pal_cache_protection_element_u { + u32 pcpi_data; + struct { + u32 data_bits : 8, /* # data bits covered by + * each unit of protection + */ + + tagprot_lsb : 6, /* Least -do- */ + tagprot_msb : 6, /* Most Sig. tag address + * bit that this + * protection covers. + */ + prot_bits : 6, /* # of protection bits */ + method : 4, /* Protection method */ + t_d : 2; /* Indicates which part + * of the cache this + * protection encoding + * applies. + */ + } pcp_info; +} pal_cache_protection_element_t; + +#define pcpi_cache_prot_part pcp_info.t_d +#define pcpi_prot_method pcp_info.method +#define pcpi_prot_bits pcp_info.prot_bits +#define pcpi_tagprot_msb pcp_info.tagprot_msb +#define pcpi_tagprot_lsb pcp_info.tagprot_lsb +#define pcpi_data_bits pcp_info.data_bits + +/* Processor cache part encodings */ +#define PAL_CACHE_PROT_PART_DATA 0 /* Data protection */ +#define PAL_CACHE_PROT_PART_TAG 1 /* Tag protection */ +#define PAL_CACHE_PROT_PART_TAG_DATA 2 /* Tag+data protection (tag is + * more significant ) + */ +#define PAL_CACHE_PROT_PART_DATA_TAG 3 /* Data+tag protection (data is + * more significant ) + */ +#define PAL_CACHE_PROT_PART_MAX 6 + + +typedef struct pal_cache_protection_info_s { + pal_status_t pcpi_status; + pal_cache_protection_element_t pcp_info[PAL_CACHE_PROT_PART_MAX]; +} pal_cache_protection_info_t; + + +/* Processor cache protection method encodings */ +#define PAL_CACHE_PROT_METHOD_NONE 0 /* No protection */ +#define PAL_CACHE_PROT_METHOD_ODD_PARITY 1 /* Odd parity */ +#define PAL_CACHE_PROT_METHOD_EVEN_PARITY 2 /* Even parity */ +#define PAL_CACHE_PROT_METHOD_ECC 3 /* ECC protection */ + + +/* Processor cache line identification in the hierarchy */ +typedef union pal_cache_line_id_u { + u64 pclid_data; + struct { + u64 cache_type : 8, /* 7-0 cache type */ + level : 8, /* 15-8 level of the + * cache in the + * hierarchy. + */ + way : 8, /* 23-16 way in the set + */ + part : 8, /* 31-24 part of the + * cache + */ + reserved : 32; /* 63-32 is reserved*/ + } pclid_info_read; + struct { + u64 cache_type : 8, /* 7-0 cache type */ + level : 8, /* 15-8 level of the + * cache in the + * hierarchy. + */ + way : 8, /* 23-16 way in the set + */ + part : 8, /* 31-24 part of the + * cache + */ + mesi : 8, /* 39-32 cache line + * state + */ + start : 8, /* 47-40 lsb of data to + * invert + */ + length : 8, /* 55-48 #bits to + * invert + */ + trigger : 8; /* 63-56 Trigger error + * by doing a load + * after the write + */ + + } pclid_info_write; +} pal_cache_line_id_u_t; + +#define pclid_read_part pclid_info_read.part +#define pclid_read_way pclid_info_read.way +#define pclid_read_level pclid_info_read.level +#define pclid_read_cache_type pclid_info_read.cache_type + +#define pclid_write_trigger pclid_info_write.trigger +#define pclid_write_length pclid_info_write.length +#define pclid_write_start pclid_info_write.start +#define pclid_write_mesi pclid_info_write.mesi +#define pclid_write_part pclid_info_write.part +#define pclid_write_way pclid_info_write.way +#define pclid_write_level pclid_info_write.level +#define pclid_write_cache_type pclid_info_write.cache_type + +/* Processor cache line part encodings */ +#define PAL_CACHE_LINE_ID_PART_DATA 0 /* Data */ +#define PAL_CACHE_LINE_ID_PART_TAG 1 /* Tag */ +#define PAL_CACHE_LINE_ID_PART_DATA_PROT 2 /* Data protection */ +#define PAL_CACHE_LINE_ID_PART_TAG_PROT 3 /* Tag protection */ +#define PAL_CACHE_LINE_ID_PART_DATA_TAG_PROT 4 /* Data+tag + * protection + */ +typedef struct pal_cache_line_info_s { + pal_status_t pcli_status; /* Return status of the read cache line + * info call. + */ + u64 pcli_data; /* 64-bit data, tag, protection bits .. */ + u64 pcli_data_len; /* data length in bits */ + pal_cache_line_state_t pcli_cache_line_state; /* mesi state */ + +} pal_cache_line_info_t; + + +/* Machine Check related crap */ + +/* Pending event status bits */ +typedef u64 pal_mc_pending_events_t; + +#define PAL_MC_PENDING_MCA (1 << 0) +#define PAL_MC_PENDING_INIT (1 << 1) + +/* Error information type */ +typedef u64 pal_mc_info_index_t; + +#define PAL_MC_INFO_PROCESSOR 0 /* Processor */ +#define PAL_MC_INFO_CACHE_CHECK 1 /* Cache check */ +#define PAL_MC_INFO_TLB_CHECK 2 /* Tlb check */ +#define PAL_MC_INFO_BUS_CHECK 3 /* Bus check */ +#define PAL_MC_INFO_REQ_ADDR 4 /* Requestor address */ +#define PAL_MC_INFO_RESP_ADDR 5 /* Responder address */ +#define PAL_MC_INFO_TARGET_ADDR 6 /* Target address */ +#define PAL_MC_INFO_IMPL_DEP 7 /* Implementation + * dependent + */ + +#define PAL_TLB_CHECK_OP_PURGE 8 + +typedef struct pal_process_state_info_s { + u64 reserved1 : 2, + rz : 1, /* PAL_CHECK processor + * rendezvous + * successful. + */ + + ra : 1, /* PAL_CHECK attempted + * a rendezvous. + */ + me : 1, /* Distinct multiple + * errors occurred + */ + + mn : 1, /* Min. state save + * area has been + * registered with PAL + */ + + sy : 1, /* Storage integrity + * synched + */ + + + co : 1, /* Continuable */ + ci : 1, /* MC isolated */ + us : 1, /* Uncontained storage + * damage. + */ + + + hd : 1, /* Non-essential hw + * lost (no loss of + * functionality) + * causing the + * processor to run in + * degraded mode. + */ + + tl : 1, /* 1 => MC occurred + * after an instr was + * executed but before + * the trap that + * resulted from instr + * execution was + * generated. + * (Trap Lost ) + */ + mi : 1, /* More information available + * call PAL_MC_ERROR_INFO + */ + pi : 1, /* Precise instruction pointer */ + pm : 1, /* Precise min-state save area */ + + dy : 1, /* Processor dynamic + * state valid + */ + + + in : 1, /* 0 = MC, 1 = INIT */ + rs : 1, /* RSE valid */ + cm : 1, /* MC corrected */ + ex : 1, /* MC is expected */ + cr : 1, /* Control regs valid*/ + pc : 1, /* Perf cntrs valid */ + dr : 1, /* Debug regs valid */ + tr : 1, /* Translation regs + * valid + */ + rr : 1, /* Region regs valid */ + ar : 1, /* App regs valid */ + br : 1, /* Branch regs valid */ + pr : 1, /* Predicate registers + * valid + */ + + fp : 1, /* fp registers valid*/ + b1 : 1, /* Preserved bank one + * general registers + * are valid + */ + b0 : 1, /* Preserved bank zero + * general registers + * are valid + */ + gr : 1, /* General registers + * are valid + * (excl. banked regs) + */ + dsize : 16, /* size of dynamic + * state returned + * by the processor + */ + + se : 1, /* Shared error. MCA in a + shared structure */ + reserved2 : 10, + cc : 1, /* Cache check */ + tc : 1, /* TLB check */ + bc : 1, /* Bus check */ + rc : 1, /* Register file check */ + uc : 1; /* Uarch check */ + +} pal_processor_state_info_t; + +typedef struct pal_cache_check_info_s { + u64 op : 4, /* Type of cache + * operation that + * caused the machine + * check. + */ + level : 2, /* Cache level */ + reserved1 : 2, + dl : 1, /* Failure in data part + * of cache line + */ + tl : 1, /* Failure in tag part + * of cache line + */ + dc : 1, /* Failure in dcache */ + ic : 1, /* Failure in icache */ + mesi : 3, /* Cache line state */ + mv : 1, /* mesi valid */ + way : 5, /* Way in which the + * error occurred + */ + wiv : 1, /* Way field valid */ + reserved2 : 1, + dp : 1, /* Data poisoned on MBE */ + reserved3 : 6, + hlth : 2, /* Health indicator */ + + index : 20, /* Cache line index */ + reserved4 : 2, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_cache_check_info_t; + +typedef struct pal_tlb_check_info_s { + + u64 tr_slot : 8, /* Slot# of TR where + * error occurred + */ + trv : 1, /* tr_slot field is valid */ + reserved1 : 1, + level : 2, /* TLB level where failure occurred */ + reserved2 : 4, + dtr : 1, /* Fail in data TR */ + itr : 1, /* Fail in inst TR */ + dtc : 1, /* Fail in data TC */ + itc : 1, /* Fail in inst. TC */ + op : 4, /* Cache operation */ + reserved3 : 6, + hlth : 2, /* Health indicator */ + reserved4 : 22, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_tlb_check_info_t; + +typedef struct pal_bus_check_info_s { + u64 size : 5, /* Xaction size */ + ib : 1, /* Internal bus error */ + eb : 1, /* External bus error */ + cc : 1, /* Error occurred + * during cache-cache + * transfer. + */ + type : 8, /* Bus xaction type*/ + sev : 5, /* Bus error severity*/ + hier : 2, /* Bus hierarchy level */ + dp : 1, /* Data poisoned on MBE */ + bsi : 8, /* Bus error status + * info + */ + reserved2 : 22, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_bus_check_info_t; + +typedef struct pal_reg_file_check_info_s { + u64 id : 4, /* Register file identifier */ + op : 4, /* Type of register + * operation that + * caused the machine + * check. + */ + reg_num : 7, /* Register number */ + rnv : 1, /* reg_num valid */ + reserved2 : 38, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + reserved3 : 3, + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_reg_file_check_info_t; + +typedef struct pal_uarch_check_info_s { + u64 sid : 5, /* Structure identification */ + level : 3, /* Level of failure */ + array_id : 4, /* Array identification */ + op : 4, /* Type of + * operation that + * caused the machine + * check. + */ + way : 6, /* Way of structure */ + wv : 1, /* way valid */ + xv : 1, /* index valid */ + reserved1 : 6, + hlth : 2, /* Health indicator */ + index : 8, /* Index or set of the uarch + * structure that failed. + */ + reserved2 : 24, + + is : 1, /* instruction set (1 == ia32) */ + iv : 1, /* instruction set field valid */ + pl : 2, /* privilege level */ + pv : 1, /* privilege level field valid */ + mcc : 1, /* Machine check corrected */ + tv : 1, /* Target address + * structure is valid + */ + rq : 1, /* Requester identifier + * structure is valid + */ + rp : 1, /* Responder identifier + * structure is valid + */ + pi : 1; /* Precise instruction pointer + * structure is valid + */ +} pal_uarch_check_info_t; + +typedef union pal_mc_error_info_u { + u64 pmei_data; + pal_processor_state_info_t pme_processor; + pal_cache_check_info_t pme_cache; + pal_tlb_check_info_t pme_tlb; + pal_bus_check_info_t pme_bus; + pal_reg_file_check_info_t pme_reg_file; + pal_uarch_check_info_t pme_uarch; +} pal_mc_error_info_t; + +#define pmci_proc_unknown_check pme_processor.uc +#define pmci_proc_bus_check pme_processor.bc +#define pmci_proc_tlb_check pme_processor.tc +#define pmci_proc_cache_check pme_processor.cc +#define pmci_proc_dynamic_state_size pme_processor.dsize +#define pmci_proc_gpr_valid pme_processor.gr +#define pmci_proc_preserved_bank0_gpr_valid pme_processor.b0 +#define pmci_proc_preserved_bank1_gpr_valid pme_processor.b1 +#define pmci_proc_fp_valid pme_processor.fp +#define pmci_proc_predicate_regs_valid pme_processor.pr +#define pmci_proc_branch_regs_valid pme_processor.br +#define pmci_proc_app_regs_valid pme_processor.ar +#define pmci_proc_region_regs_valid pme_processor.rr +#define pmci_proc_translation_regs_valid pme_processor.tr +#define pmci_proc_debug_regs_valid pme_processor.dr +#define pmci_proc_perf_counters_valid pme_processor.pc +#define pmci_proc_control_regs_valid pme_processor.cr +#define pmci_proc_machine_check_expected pme_processor.ex +#define pmci_proc_machine_check_corrected pme_processor.cm +#define pmci_proc_rse_valid pme_processor.rs +#define pmci_proc_machine_check_or_init pme_processor.in +#define pmci_proc_dynamic_state_valid pme_processor.dy +#define pmci_proc_operation pme_processor.op +#define pmci_proc_trap_lost pme_processor.tl +#define pmci_proc_hardware_damage pme_processor.hd +#define pmci_proc_uncontained_storage_damage pme_processor.us +#define pmci_proc_machine_check_isolated pme_processor.ci +#define pmci_proc_continuable pme_processor.co +#define pmci_proc_storage_intergrity_synced pme_processor.sy +#define pmci_proc_min_state_save_area_regd pme_processor.mn +#define pmci_proc_distinct_multiple_errors pme_processor.me +#define pmci_proc_pal_attempted_rendezvous pme_processor.ra +#define pmci_proc_pal_rendezvous_complete pme_processor.rz + + +#define pmci_cache_level pme_cache.level +#define pmci_cache_line_state pme_cache.mesi +#define pmci_cache_line_state_valid pme_cache.mv +#define pmci_cache_line_index pme_cache.index +#define pmci_cache_instr_cache_fail pme_cache.ic +#define pmci_cache_data_cache_fail pme_cache.dc +#define pmci_cache_line_tag_fail pme_cache.tl +#define pmci_cache_line_data_fail pme_cache.dl +#define pmci_cache_operation pme_cache.op +#define pmci_cache_way_valid pme_cache.wv +#define pmci_cache_target_address_valid pme_cache.tv +#define pmci_cache_way pme_cache.way +#define pmci_cache_mc pme_cache.mc + +#define pmci_tlb_instr_translation_cache_fail pme_tlb.itc +#define pmci_tlb_data_translation_cache_fail pme_tlb.dtc +#define pmci_tlb_instr_translation_reg_fail pme_tlb.itr +#define pmci_tlb_data_translation_reg_fail pme_tlb.dtr +#define pmci_tlb_translation_reg_slot pme_tlb.tr_slot +#define pmci_tlb_mc pme_tlb.mc + +#define pmci_bus_status_info pme_bus.bsi +#define pmci_bus_req_address_valid pme_bus.rq +#define pmci_bus_resp_address_valid pme_bus.rp +#define pmci_bus_target_address_valid pme_bus.tv +#define pmci_bus_error_severity pme_bus.sev +#define pmci_bus_transaction_type pme_bus.type +#define pmci_bus_cache_cache_transfer pme_bus.cc +#define pmci_bus_transaction_size pme_bus.size +#define pmci_bus_internal_error pme_bus.ib +#define pmci_bus_external_error pme_bus.eb +#define pmci_bus_mc pme_bus.mc + +/* + * NOTE: this min_state_save area struct only includes the 1KB + * architectural state save area. The other 3 KB is scratch space + * for PAL. + */ + +struct pal_min_state_area { + u64 pmsa_nat_bits; /* nat bits for saved GRs */ + u64 pmsa_gr[15]; /* GR1 - GR15 */ + u64 pmsa_bank0_gr[16]; /* GR16 - GR31 */ + u64 pmsa_bank1_gr[16]; /* GR16 - GR31 */ + u64 pmsa_pr; /* predicate registers */ + u64 pmsa_br0; /* branch register 0 */ + u64 pmsa_rsc; /* ar.rsc */ + u64 pmsa_iip; /* cr.iip */ + u64 pmsa_ipsr; /* cr.ipsr */ + u64 pmsa_ifs; /* cr.ifs */ + u64 pmsa_xip; /* previous iip */ + u64 pmsa_xpsr; /* previous psr */ + u64 pmsa_xfs; /* previous ifs */ + u64 pmsa_br1; /* branch register 1 */ + u64 pmsa_reserved[70]; /* pal_min_state_area should total to 1KB */ +}; + + +struct ia64_pal_retval { + /* + * A zero status value indicates call completed without error. + * A negative status value indicates reason of call failure. + * A positive status value indicates success but an + * informational value should be printed (e.g., "reboot for + * change to take effect"). + */ + s64 status; + u64 v0; + u64 v1; + u64 v2; +}; + +/* + * Note: Currently unused PAL arguments are generally labeled + * "reserved" so the value specified in the PAL documentation + * (generally 0) MUST be passed. Reserved parameters are not optional + * parameters. + */ +extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64); +extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64); +extern void ia64_save_scratch_fpregs (struct ia64_fpreg *); +extern void ia64_load_scratch_fpregs (struct ia64_fpreg *); + +#define PAL_CALL(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_static(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_static(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +#define PAL_CALL_PHYS_STK(iprv,a0,a1,a2,a3) do { \ + struct ia64_fpreg fr[6]; \ + ia64_save_scratch_fpregs(fr); \ + iprv = ia64_pal_call_phys_stacked(a0, a1, a2, a3); \ + ia64_load_scratch_fpregs(fr); \ +} while (0) + +typedef int (*ia64_pal_handler) (u64, ...); +extern ia64_pal_handler ia64_pal; +extern void ia64_pal_handler_init (void *); + +extern ia64_pal_handler ia64_pal; + +extern pal_cache_config_info_t l0d_cache_config_info; +extern pal_cache_config_info_t l0i_cache_config_info; +extern pal_cache_config_info_t l1_cache_config_info; +extern pal_cache_config_info_t l2_cache_config_info; + +extern pal_cache_protection_info_t l0d_cache_protection_info; +extern pal_cache_protection_info_t l0i_cache_protection_info; +extern pal_cache_protection_info_t l1_cache_protection_info; +extern pal_cache_protection_info_t l2_cache_protection_info; + +extern pal_cache_config_info_t pal_cache_config_info_get(pal_cache_level_t, + pal_cache_type_t); + +extern pal_cache_protection_info_t pal_cache_protection_info_get(pal_cache_level_t, + pal_cache_type_t); + + +extern void pal_error(int); + + +/* Useful wrappers for the current list of pal procedures */ + +typedef union pal_bus_features_u { + u64 pal_bus_features_val; + struct { + u64 pbf_reserved1 : 29; + u64 pbf_req_bus_parking : 1; + u64 pbf_bus_lock_mask : 1; + u64 pbf_enable_half_xfer_rate : 1; + u64 pbf_reserved2 : 20; + u64 pbf_enable_shared_line_replace : 1; + u64 pbf_enable_exclusive_line_replace : 1; + u64 pbf_disable_xaction_queueing : 1; + u64 pbf_disable_resp_err_check : 1; + u64 pbf_disable_berr_check : 1; + u64 pbf_disable_bus_req_internal_err_signal : 1; + u64 pbf_disable_bus_req_berr_signal : 1; + u64 pbf_disable_bus_init_event_check : 1; + u64 pbf_disable_bus_init_event_signal : 1; + u64 pbf_disable_bus_addr_err_check : 1; + u64 pbf_disable_bus_addr_err_signal : 1; + u64 pbf_disable_bus_data_err_check : 1; + } pal_bus_features_s; +} pal_bus_features_u_t; + +extern void pal_bus_features_print (u64); + +/* Provide information about configurable processor bus features */ +static inline s64 +ia64_pal_bus_get_features (pal_bus_features_u_t *features_avail, + pal_bus_features_u_t *features_status, + pal_bus_features_u_t *features_control) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_BUS_GET_FEATURES, 0, 0, 0); + if (features_avail) + features_avail->pal_bus_features_val = iprv.v0; + if (features_status) + features_status->pal_bus_features_val = iprv.v1; + if (features_control) + features_control->pal_bus_features_val = iprv.v2; + return iprv.status; +} + +/* Enables/disables specific processor bus features */ +static inline s64 +ia64_pal_bus_set_features (pal_bus_features_u_t feature_select) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_BUS_SET_FEATURES, feature_select.pal_bus_features_val, 0, 0); + return iprv.status; +} + +/* Get detailed cache information */ +static inline s64 +ia64_pal_cache_config_info (u64 cache_level, u64 cache_type, pal_cache_config_info_t *conf) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_INFO, cache_level, cache_type, 0); + + if (iprv.status == 0) { + conf->pcci_status = iprv.status; + conf->pcci_info_1.pcci1_data = iprv.v0; + conf->pcci_info_2.pcci2_data = iprv.v1; + conf->pcci_reserved = iprv.v2; + } + return iprv.status; + +} + +/* Get detailed cche protection information */ +static inline s64 +ia64_pal_cache_prot_info (u64 cache_level, u64 cache_type, pal_cache_protection_info_t *prot) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_PROT_INFO, cache_level, cache_type, 0); + + if (iprv.status == 0) { + prot->pcpi_status = iprv.status; + prot->pcp_info[0].pcpi_data = iprv.v0 & 0xffffffff; + prot->pcp_info[1].pcpi_data = iprv.v0 >> 32; + prot->pcp_info[2].pcpi_data = iprv.v1 & 0xffffffff; + prot->pcp_info[3].pcpi_data = iprv.v1 >> 32; + prot->pcp_info[4].pcpi_data = iprv.v2 & 0xffffffff; + prot->pcp_info[5].pcpi_data = iprv.v2 >> 32; + } + return iprv.status; +} + +/* + * Flush the processor instruction or data caches. *PROGRESS must be + * initialized to zero before calling this for the first time.. + */ +static inline s64 +ia64_pal_cache_flush (u64 cache_type, u64 invalidate, u64 *progress, u64 *vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_FLUSH, cache_type, invalidate, *progress); + if (vector) + *vector = iprv.v0; + *progress = iprv.v1; + return iprv.status; +} + + +/* Initialize the processor controlled caches */ +static inline s64 +ia64_pal_cache_init (u64 level, u64 cache_type, u64 rest) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_INIT, level, cache_type, rest); + return iprv.status; +} + +/* Initialize the tags and data of a data or unified cache line of + * processor controlled cache to known values without the availability + * of backing memory. + */ +static inline s64 +ia64_pal_cache_line_init (u64 physical_addr, u64 data_value) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_LINE_INIT, physical_addr, data_value, 0); + return iprv.status; +} + + +/* Read the data and tag of a processor controlled cache line for diags */ +static inline s64 +ia64_pal_cache_read (pal_cache_line_id_u_t line_id, u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_CACHE_READ, line_id.pclid_data, + physical_addr, 0); + return iprv.status; +} + +/* Return summary information about the hierarchy of caches controlled by the processor */ +static inline long ia64_pal_cache_summary(unsigned long *cache_levels, + unsigned long *unique_caches) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_CACHE_SUMMARY, 0, 0, 0); + if (cache_levels) + *cache_levels = iprv.v0; + if (unique_caches) + *unique_caches = iprv.v1; + return iprv.status; +} + +/* Write the data and tag of a processor-controlled cache line for diags */ +static inline s64 +ia64_pal_cache_write (pal_cache_line_id_u_t line_id, u64 physical_addr, u64 data) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_CACHE_WRITE, line_id.pclid_data, + physical_addr, data); + return iprv.status; +} + + +/* Return the parameters needed to copy relocatable PAL procedures from ROM to memory */ +static inline s64 +ia64_pal_copy_info (u64 copy_type, u64 num_procs, u64 num_iopics, + u64 *buffer_size, u64 *buffer_align) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_COPY_INFO, copy_type, num_procs, num_iopics); + if (buffer_size) + *buffer_size = iprv.v0; + if (buffer_align) + *buffer_align = iprv.v1; + return iprv.status; +} + +/* Copy relocatable PAL procedures from ROM to memory */ +static inline s64 +ia64_pal_copy_pal (u64 target_addr, u64 alloc_size, u64 processor, u64 *pal_proc_offset) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_COPY_PAL, target_addr, alloc_size, processor); + if (pal_proc_offset) + *pal_proc_offset = iprv.v0; + return iprv.status; +} + +/* Return the number of instruction and data debug register pairs */ +static inline long ia64_pal_debug_info(unsigned long *inst_regs, + unsigned long *data_regs) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_DEBUG_INFO, 0, 0, 0); + if (inst_regs) + *inst_regs = iprv.v0; + if (data_regs) + *data_regs = iprv.v1; + + return iprv.status; +} + +#ifdef TBD +/* Switch from IA64-system environment to IA-32 system environment */ +static inline s64 +ia64_pal_enter_ia32_env (ia32_env1, ia32_env2, ia32_env3) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_ENTER_IA_32_ENV, ia32_env1, ia32_env2, ia32_env3); + return iprv.status; +} +#endif + +/* Get unique geographical address of this processor on its bus */ +static inline s64 +ia64_pal_fixed_addr (u64 *global_unique_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FIXED_ADDR, 0, 0, 0); + if (global_unique_addr) + *global_unique_addr = iprv.v0; + return iprv.status; +} + +/* Get base frequency of the platform if generated by the processor */ +static inline long ia64_pal_freq_base(unsigned long *platform_base_freq) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FREQ_BASE, 0, 0, 0); + if (platform_base_freq) + *platform_base_freq = iprv.v0; + return iprv.status; +} + +/* + * Get the ratios for processor frequency, bus frequency and interval timer to + * the base frequency of the platform + */ +static inline s64 +ia64_pal_freq_ratios (struct pal_freq_ratio *proc_ratio, struct pal_freq_ratio *bus_ratio, + struct pal_freq_ratio *itc_ratio) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_FREQ_RATIOS, 0, 0, 0); + if (proc_ratio) + *(u64 *)proc_ratio = iprv.v0; + if (bus_ratio) + *(u64 *)bus_ratio = iprv.v1; + if (itc_ratio) + *(u64 *)itc_ratio = iprv.v2; + return iprv.status; +} + +/* + * Get the current hardware resource sharing policy of the processor + */ +static inline s64 +ia64_pal_get_hw_policy (u64 proc_num, u64 *cur_policy, u64 *num_impacted, + u64 *la) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_GET_HW_POLICY, proc_num, 0, 0); + if (cur_policy) + *cur_policy = iprv.v0; + if (num_impacted) + *num_impacted = iprv.v1; + if (la) + *la = iprv.v2; + return iprv.status; +} + +/* Make the processor enter HALT or one of the implementation dependent low + * power states where prefetching and execution are suspended and cache and + * TLB coherency is not maintained. + */ +static inline s64 +ia64_pal_halt (u64 halt_state) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_HALT, halt_state, 0, 0); + return iprv.status; +} + +typedef union pal_power_mgmt_info_u { + u64 ppmi_data; + struct { + u64 exit_latency : 16, + entry_latency : 16, + power_consumption : 28, + im : 1, + co : 1, + reserved : 2; + } pal_power_mgmt_info_s; +} pal_power_mgmt_info_u_t; + +/* Return information about processor's optional power management capabilities. */ +static inline s64 +ia64_pal_halt_info (pal_power_mgmt_info_u_t *power_buf) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_HALT_INFO, (unsigned long) power_buf, 0, 0); + return iprv.status; +} + +/* Get the current P-state information */ +static inline s64 +ia64_pal_get_pstate (u64 *pstate_index, unsigned long type) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_GET_PSTATE, type, 0, 0); + *pstate_index = iprv.v0; + return iprv.status; +} + +/* Set the P-state */ +static inline s64 +ia64_pal_set_pstate (u64 pstate_index) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_SET_PSTATE, pstate_index, 0, 0); + return iprv.status; +} + +/* Processor branding information*/ +static inline s64 +ia64_pal_get_brand_info (char *brand_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_BRAND_INFO, 0, (u64)brand_info, 0); + return iprv.status; +} + +/* Cause the processor to enter LIGHT HALT state, where prefetching and execution are + * suspended, but cache and TLB coherency is maintained. + */ +static inline s64 +ia64_pal_halt_light (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_HALT_LIGHT, 0, 0, 0); + return iprv.status; +} + +/* Clear all the processor error logging registers and reset the indicator that allows + * the error logging registers to be written. This procedure also checks the pending + * machine check bit and pending INIT bit and reports their states. + */ +static inline s64 +ia64_pal_mc_clear_log (u64 *pending_vector) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_CLEAR_LOG, 0, 0, 0); + if (pending_vector) + *pending_vector = iprv.v0; + return iprv.status; +} + +/* Ensure that all outstanding transactions in a processor are completed or that any + * MCA due to thes outstanding transaction is taken. + */ +static inline s64 +ia64_pal_mc_drain (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_DRAIN, 0, 0, 0); + return iprv.status; +} + +/* Return the machine check dynamic processor state */ +static inline s64 +ia64_pal_mc_dynamic_state (u64 info_type, u64 dy_buffer, u64 *size) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_DYNAMIC_STATE, info_type, dy_buffer, 0); + if (size) + *size = iprv.v0; + return iprv.status; +} + +/* Return processor machine check information */ +static inline s64 +ia64_pal_mc_error_info (u64 info_index, u64 type_index, u64 *size, u64 *error_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_ERROR_INFO, info_index, type_index, 0); + if (size) + *size = iprv.v0; + if (error_info) + *error_info = iprv.v1; + return iprv.status; +} + +/* Injects the requested processor error or returns info on + * supported injection capabilities for current processor implementation + */ +static inline s64 +ia64_pal_mc_error_inject_phys (u64 err_type_info, u64 err_struct_info, + u64 err_data_buffer, u64 *capabilities, u64 *resources) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, + err_struct_info, err_data_buffer); + if (capabilities) + *capabilities= iprv.v0; + if (resources) + *resources= iprv.v1; + return iprv.status; +} + +static inline s64 +ia64_pal_mc_error_inject_virt (u64 err_type_info, u64 err_struct_info, + u64 err_data_buffer, u64 *capabilities, u64 *resources) +{ + struct ia64_pal_retval iprv; + PAL_CALL_STK(iprv, PAL_MC_ERROR_INJECT, err_type_info, + err_struct_info, err_data_buffer); + if (capabilities) + *capabilities= iprv.v0; + if (resources) + *resources= iprv.v1; + return iprv.status; +} + +/* Inform PALE_CHECK whether a machine check is expected so that PALE_CHECK willnot + * attempt to correct any expected machine checks. + */ +static inline s64 +ia64_pal_mc_expected (u64 expected, u64 *previous) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_EXPECTED, expected, 0, 0); + if (previous) + *previous = iprv.v0; + return iprv.status; +} + +typedef union pal_hw_tracking_u { + u64 pht_data; + struct { + u64 itc :4, /* Instruction cache tracking */ + dct :4, /* Date cache tracking */ + itt :4, /* Instruction TLB tracking */ + ddt :4, /* Data TLB tracking */ + reserved:48; + } pal_hw_tracking_s; +} pal_hw_tracking_u_t; + +/* + * Hardware tracking status. + */ +static inline s64 +ia64_pal_mc_hw_tracking (u64 *status) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_HW_TRACKING, 0, 0, 0); + if (status) + *status = iprv.v0; + return iprv.status; +} + +/* Register a platform dependent location with PAL to which it can save + * minimal processor state in the event of a machine check or initialization + * event. + */ +static inline s64 +ia64_pal_mc_register_mem (u64 physical_addr, u64 size, u64 *req_size) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_REGISTER_MEM, physical_addr, size, 0); + if (req_size) + *req_size = iprv.v0; + return iprv.status; +} + +/* Restore minimal architectural processor state, set CMC interrupt if necessary + * and resume execution + */ +static inline s64 +ia64_pal_mc_resume (u64 set_cmci, u64 save_ptr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MC_RESUME, set_cmci, save_ptr, 0); + return iprv.status; +} + +/* Return the memory attributes implemented by the processor */ +static inline s64 +ia64_pal_mem_attrib (u64 *mem_attrib) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MEM_ATTRIB, 0, 0, 0); + if (mem_attrib) + *mem_attrib = iprv.v0 & 0xff; + return iprv.status; +} + +/* Return the amount of memory needed for second phase of processor + * self-test and the required alignment of memory. + */ +static inline s64 +ia64_pal_mem_for_test (u64 *bytes_needed, u64 *alignment) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_MEM_FOR_TEST, 0, 0, 0); + if (bytes_needed) + *bytes_needed = iprv.v0; + if (alignment) + *alignment = iprv.v1; + return iprv.status; +} + +typedef union pal_perf_mon_info_u { + u64 ppmi_data; + struct { + u64 generic : 8, + width : 8, + cycles : 8, + retired : 8, + reserved : 32; + } pal_perf_mon_info_s; +} pal_perf_mon_info_u_t; + +/* Return the performance monitor information about what can be counted + * and how to configure the monitors to count the desired events. + */ +static inline s64 +ia64_pal_perf_mon_info (u64 *pm_buffer, pal_perf_mon_info_u_t *pm_info) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PERF_MON_INFO, (unsigned long) pm_buffer, 0, 0); + if (pm_info) + pm_info->ppmi_data = iprv.v0; + return iprv.status; +} + +/* Specifies the physical address of the processor interrupt block + * and I/O port space. + */ +static inline s64 +ia64_pal_platform_addr (u64 type, u64 physical_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PLATFORM_ADDR, type, physical_addr, 0); + return iprv.status; +} + +/* Set the SAL PMI entrypoint in memory */ +static inline s64 +ia64_pal_pmi_entrypoint (u64 sal_pmi_entry_addr) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PMI_ENTRYPOINT, sal_pmi_entry_addr, 0, 0); + return iprv.status; +} + +struct pal_features_s; +/* Provide information about configurable processor features */ +static inline s64 +ia64_pal_proc_get_features (u64 *features_avail, + u64 *features_status, + u64 *features_control, + u64 features_set) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, features_set, 0); + if (iprv.status == 0) { + *features_avail = iprv.v0; + *features_status = iprv.v1; + *features_control = iprv.v2; + } + return iprv.status; +} + +/* Enable/disable processor dependent features */ +static inline s64 +ia64_pal_proc_set_features (u64 feature_select) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, feature_select, 0, 0); + return iprv.status; +} + +/* + * Put everything in a struct so we avoid the global offset table whenever + * possible. + */ +typedef struct ia64_ptce_info_s { + unsigned long base; + u32 count[2]; + u32 stride[2]; +} ia64_ptce_info_t; + +/* Return the information required for the architected loop used to purge + * (initialize) the entire TC + */ +static inline s64 +ia64_get_ptce (ia64_ptce_info_t *ptce) +{ + struct ia64_pal_retval iprv; + + if (!ptce) + return -1; + + PAL_CALL(iprv, PAL_PTCE_INFO, 0, 0, 0); + if (iprv.status == 0) { + ptce->base = iprv.v0; + ptce->count[0] = iprv.v1 >> 32; + ptce->count[1] = iprv.v1 & 0xffffffff; + ptce->stride[0] = iprv.v2 >> 32; + ptce->stride[1] = iprv.v2 & 0xffffffff; + } + return iprv.status; +} + +/* Return info about implemented application and control registers. */ +static inline s64 +ia64_pal_register_info (u64 info_request, u64 *reg_info_1, u64 *reg_info_2) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_REGISTER_INFO, info_request, 0, 0); + if (reg_info_1) + *reg_info_1 = iprv.v0; + if (reg_info_2) + *reg_info_2 = iprv.v1; + return iprv.status; +} + +typedef union pal_hints_u { + unsigned long ph_data; + struct { + unsigned long si : 1, + li : 1, + reserved : 62; + } pal_hints_s; +} pal_hints_u_t; + +/* Return information about the register stack and RSE for this processor + * implementation. + */ +static inline long ia64_pal_rse_info(unsigned long *num_phys_stacked, + pal_hints_u_t *hints) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_RSE_INFO, 0, 0, 0); + if (num_phys_stacked) + *num_phys_stacked = iprv.v0; + if (hints) + hints->ph_data = iprv.v1; + return iprv.status; +} + +/* + * Set the current hardware resource sharing policy of the processor + */ +static inline s64 +ia64_pal_set_hw_policy (u64 policy) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_SET_HW_POLICY, policy, 0, 0); + return iprv.status; +} + +/* Cause the processor to enter SHUTDOWN state, where prefetching and execution are + * suspended, but cause cache and TLB coherency to be maintained. + * This is usually called in IA-32 mode. + */ +static inline s64 +ia64_pal_shutdown (void) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_SHUTDOWN, 0, 0, 0); + return iprv.status; +} + +/* Perform the second phase of processor self-test. */ +static inline s64 +ia64_pal_test_proc (u64 test_addr, u64 test_size, u64 attributes, u64 *self_test_state) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_TEST_PROC, test_addr, test_size, attributes); + if (self_test_state) + *self_test_state = iprv.v0; + return iprv.status; +} + +typedef union pal_version_u { + u64 pal_version_val; + struct { + u64 pv_pal_b_rev : 8; + u64 pv_pal_b_model : 8; + u64 pv_reserved1 : 8; + u64 pv_pal_vendor : 8; + u64 pv_pal_a_rev : 8; + u64 pv_pal_a_model : 8; + u64 pv_reserved2 : 16; + } pal_version_s; +} pal_version_u_t; + + +/* + * Return PAL version information. While the documentation states that + * PAL_VERSION can be called in either physical or virtual mode, some + * implementations only allow physical calls. We don't call it very often, + * so the overhead isn't worth eliminating. + */ +static inline s64 +ia64_pal_version (pal_version_u_t *pal_min_version, pal_version_u_t *pal_cur_version) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS(iprv, PAL_VERSION, 0, 0, 0); + if (pal_min_version) + pal_min_version->pal_version_val = iprv.v0; + + if (pal_cur_version) + pal_cur_version->pal_version_val = iprv.v1; + + return iprv.status; +} + +typedef union pal_tc_info_u { + u64 pti_val; + struct { + u64 num_sets : 8, + associativity : 8, + num_entries : 16, + pf : 1, + unified : 1, + reduce_tr : 1, + reserved : 29; + } pal_tc_info_s; +} pal_tc_info_u_t; + +#define tc_reduce_tr pal_tc_info_s.reduce_tr +#define tc_unified pal_tc_info_s.unified +#define tc_pf pal_tc_info_s.pf +#define tc_num_entries pal_tc_info_s.num_entries +#define tc_associativity pal_tc_info_s.associativity +#define tc_num_sets pal_tc_info_s.num_sets + + +/* Return information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_info (u64 tc_level, u64 tc_type, pal_tc_info_u_t *tc_info, u64 *tc_pages) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_INFO, tc_level, tc_type, 0); + if (tc_info) + tc_info->pti_val = iprv.v0; + if (tc_pages) + *tc_pages = iprv.v1; + return iprv.status; +} + +/* Get page size information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 ia64_pal_vm_page_size(u64 *tr_pages, u64 *vw_pages) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_PAGE_SIZE, 0, 0, 0); + if (tr_pages) + *tr_pages = iprv.v0; + if (vw_pages) + *vw_pages = iprv.v1; + return iprv.status; +} + +typedef union pal_vm_info_1_u { + u64 pvi1_val; + struct { + u64 vw : 1, + phys_add_size : 7, + key_size : 8, + max_pkr : 8, + hash_tag_id : 8, + max_dtr_entry : 8, + max_itr_entry : 8, + max_unique_tcs : 8, + num_tc_levels : 8; + } pal_vm_info_1_s; +} pal_vm_info_1_u_t; + +#define PAL_MAX_PURGES 0xFFFF /* all ones is means unlimited */ + +typedef union pal_vm_info_2_u { + u64 pvi2_val; + struct { + u64 impl_va_msb : 8, + rid_size : 8, + max_purges : 16, + reserved : 32; + } pal_vm_info_2_s; +} pal_vm_info_2_u_t; + +/* Get summary information about the virtual memory characteristics of the processor + * implementation. + */ +static inline s64 +ia64_pal_vm_summary (pal_vm_info_1_u_t *vm_info_1, pal_vm_info_2_u_t *vm_info_2) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VM_SUMMARY, 0, 0, 0); + if (vm_info_1) + vm_info_1->pvi1_val = iprv.v0; + if (vm_info_2) + vm_info_2->pvi2_val = iprv.v1; + return iprv.status; +} + +typedef union pal_vp_info_u { + u64 pvi_val; + struct { + u64 index: 48, /* virtual feature set info */ + vmm_id: 16; /* feature set id */ + } pal_vp_info_s; +} pal_vp_info_u_t; + +/* + * Returns information about virtual processor features + */ +static inline s64 +ia64_pal_vp_info (u64 feature_set, u64 vp_buffer, u64 *vp_info, u64 *vmm_id) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_VP_INFO, feature_set, vp_buffer, 0); + if (vp_info) + *vp_info = iprv.v0; + if (vmm_id) + *vmm_id = iprv.v1; + return iprv.status; +} + +typedef union pal_itr_valid_u { + u64 piv_val; + struct { + u64 access_rights_valid : 1, + priv_level_valid : 1, + dirty_bit_valid : 1, + mem_attr_valid : 1, + reserved : 60; + } pal_tr_valid_s; +} pal_tr_valid_u_t; + +/* Read a translation register */ +static inline s64 +ia64_pal_tr_read (u64 reg_num, u64 tr_type, u64 *tr_buffer, pal_tr_valid_u_t *tr_valid) +{ + struct ia64_pal_retval iprv; + PAL_CALL_PHYS_STK(iprv, PAL_VM_TR_READ, reg_num, tr_type,(u64)ia64_tpa(tr_buffer)); + if (tr_valid) + tr_valid->piv_val = iprv.v0; + return iprv.status; +} + +/* + * PAL_PREFETCH_VISIBILITY transaction types + */ +#define PAL_VISIBILITY_VIRTUAL 0 +#define PAL_VISIBILITY_PHYSICAL 1 + +/* + * PAL_PREFETCH_VISIBILITY return codes + */ +#define PAL_VISIBILITY_OK 1 +#define PAL_VISIBILITY_OK_REMOTE_NEEDED 0 +#define PAL_VISIBILITY_INVAL_ARG -2 +#define PAL_VISIBILITY_ERROR -3 + +static inline s64 +ia64_pal_prefetch_visibility (s64 trans_type) +{ + struct ia64_pal_retval iprv; + PAL_CALL(iprv, PAL_PREFETCH_VISIBILITY, trans_type, 0, 0); + return iprv.status; +} + +/* data structure for getting information on logical to physical mappings */ +typedef union pal_log_overview_u { + struct { + u64 num_log :16, /* Total number of logical + * processors on this die + */ + tpc :8, /* Threads per core */ + reserved3 :8, /* Reserved */ + cpp :8, /* Cores per processor */ + reserved2 :8, /* Reserved */ + ppid :8, /* Physical processor ID */ + reserved1 :8; /* Reserved */ + } overview_bits; + u64 overview_data; +} pal_log_overview_t; + +typedef union pal_proc_n_log_info1_u{ + struct { + u64 tid :16, /* Thread id */ + reserved2 :16, /* Reserved */ + cid :16, /* Core id */ + reserved1 :16; /* Reserved */ + } ppli1_bits; + u64 ppli1_data; +} pal_proc_n_log_info1_t; + +typedef union pal_proc_n_log_info2_u { + struct { + u64 la :16, /* Logical address */ + reserved :48; /* Reserved */ + } ppli2_bits; + u64 ppli2_data; +} pal_proc_n_log_info2_t; + +typedef struct pal_logical_to_physical_s +{ + pal_log_overview_t overview; + pal_proc_n_log_info1_t ppli1; + pal_proc_n_log_info2_t ppli2; +} pal_logical_to_physical_t; + +#define overview_num_log overview.overview_bits.num_log +#define overview_tpc overview.overview_bits.tpc +#define overview_cpp overview.overview_bits.cpp +#define overview_ppid overview.overview_bits.ppid +#define log1_tid ppli1.ppli1_bits.tid +#define log1_cid ppli1.ppli1_bits.cid +#define log2_la ppli2.ppli2_bits.la + +/* Get information on logical to physical processor mappings. */ +static inline s64 +ia64_pal_logical_to_phys(u64 proc_number, pal_logical_to_physical_t *mapping) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_LOGICAL_TO_PHYSICAL, proc_number, 0, 0); + + if (iprv.status == PAL_STATUS_SUCCESS) + { + mapping->overview.overview_data = iprv.v0; + mapping->ppli1.ppli1_data = iprv.v1; + mapping->ppli2.ppli2_data = iprv.v2; + } + + return iprv.status; +} + +typedef struct pal_cache_shared_info_s +{ + u64 num_shared; + pal_proc_n_log_info1_t ppli1; + pal_proc_n_log_info2_t ppli2; +} pal_cache_shared_info_t; + +/* Get information on logical to physical processor mappings. */ +static inline s64 +ia64_pal_cache_shared_info(u64 level, + u64 type, + u64 proc_number, + pal_cache_shared_info_t *info) +{ + struct ia64_pal_retval iprv; + + PAL_CALL(iprv, PAL_CACHE_SHARED_INFO, level, type, proc_number); + + if (iprv.status == PAL_STATUS_SUCCESS) { + info->num_shared = iprv.v0; + info->ppli1.ppli1_data = iprv.v1; + info->ppli2.ppli2_data = iprv.v2; + } + + return iprv.status; +} +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_PAL_H */ diff --git a/arch/ia64/include/asm/param.h b/arch/ia64/include/asm/param.h new file mode 100644 index 000000000000..f0b786227c40 --- /dev/null +++ b/arch/ia64/include/asm/param.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Fundamental kernel parameters. + * + * Based on . + * + * Modified 1998, 1999, 2002-2003 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_PARAM_H +#define _ASM_IA64_PARAM_H + +#include + +# define HZ CONFIG_HZ +# define USER_HZ HZ +# define CLOCKS_PER_SEC HZ /* frequency at which times() counts */ +#endif /* _ASM_IA64_PARAM_H */ diff --git a/arch/ia64/include/asm/parport.h b/arch/ia64/include/asm/parport.h new file mode 100644 index 000000000000..360ca9bf2f6f --- /dev/null +++ b/arch/ia64/include/asm/parport.h @@ -0,0 +1,20 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * parport.h: platform-specific PC-style parport initialisation + * + * Copyright (C) 1999, 2000 Tim Waugh + * + * This file should only be included by drivers/parport/parport_pc.c. + */ + +#ifndef _ASM_IA64_PARPORT_H +#define _ASM_IA64_PARPORT_H 1 + +static int parport_pc_find_isa_ports(int autoirq, int autodma); + +static int parport_pc_find_nonpci_ports(int autoirq, int autodma) +{ + return parport_pc_find_isa_ports(autoirq, autodma); +} + +#endif /* _ASM_IA64_PARPORT_H */ diff --git a/arch/ia64/include/asm/patch.h b/arch/ia64/include/asm/patch.h new file mode 100644 index 000000000000..bd487ed22bf5 --- /dev/null +++ b/arch/ia64/include/asm/patch.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PATCH_H +#define _ASM_IA64_PATCH_H + +/* + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * There are a number of reasons for patching instructions. Rather than duplicating code + * all over the place, we put the common stuff here. Reasons for patching: in-kernel + * module-loader, virtual-to-physical patch-list, McKinley Errata 9 workaround, and gate + * shared library. Undoubtedly, some of these reasons will disappear and others will + * be added over time. + */ +#include +#include + +extern void ia64_patch (u64 insn_addr, u64 mask, u64 val); /* patch any insn slot */ +extern void ia64_patch_imm64 (u64 insn_addr, u64 val); /* patch "movl" w/abs. value*/ +extern void ia64_patch_imm60 (u64 insn_addr, u64 val); /* patch "brl" w/ip-rel value */ + +extern void ia64_patch_mckinley_e9 (unsigned long start, unsigned long end); +extern void ia64_patch_vtop (unsigned long start, unsigned long end); +extern void ia64_patch_phys_stack_reg(unsigned long val); +extern void ia64_patch_rse (unsigned long start, unsigned long end); +extern void ia64_patch_gate (void); + +#endif /* _ASM_IA64_PATCH_H */ diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h new file mode 100644 index 000000000000..fa8f545c24c9 --- /dev/null +++ b/arch/ia64/include/asm/pci.h @@ -0,0 +1,66 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PCI_H +#define _ASM_IA64_PCI_H + +#include +#include +#include +#include +#include +#include + +#include +#include + +struct pci_vector_struct { + __u16 segment; /* PCI Segment number */ + __u16 bus; /* PCI Bus number */ + __u32 pci_id; /* ACPI split 16 bits device, 16 bits function (see section 6.1.1) */ + __u8 pin; /* PCI PIN (0 = A, 1 = B, 2 = C, 3 = D) */ + __u32 irq; /* IRQ assigned */ +}; + +/* + * Can be used to override the logic in pci_scan_bus for skipping already-configured bus + * numbers - to be used for buggy BIOSes or architectures with incomplete PCI setup by the + * loader. + */ +#define pcibios_assign_all_busses() 0 + +#define PCIBIOS_MIN_IO 0x1000 +#define PCIBIOS_MIN_MEM 0x10000000 + +#define HAVE_PCI_MMAP +#define ARCH_GENERIC_PCI_MMAP_RESOURCE +#define arch_can_pci_mmap_wc() 1 + +#define HAVE_PCI_LEGACY +extern int pci_mmap_legacy_page_range(struct pci_bus *bus, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state); + +char *pci_get_legacy_mem(struct pci_bus *bus); +int pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size); +int pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size); + +struct pci_controller { + struct acpi_device *companion; + void *iommu; + int segment; + int node; /* nearest node with memory or NUMA_NO_NODE for global allocation */ + + void *platform_data; +}; + + +#define PCI_CONTROLLER(busdev) ((struct pci_controller *) busdev->sysdata) +#define pci_domain_nr(busdev) (PCI_CONTROLLER(busdev)->segment) + +extern struct pci_ops pci_root_ops; + +static inline int pci_proc_domain(struct pci_bus *bus) +{ + return (pci_domain_nr(bus) != 0); +} + +#endif /* _ASM_IA64_PCI_H */ diff --git a/arch/ia64/include/asm/percpu.h b/arch/ia64/include/asm/percpu.h new file mode 100644 index 000000000000..f357b9bb3576 --- /dev/null +++ b/arch/ia64/include/asm/percpu.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PERCPU_H +#define _ASM_IA64_PERCPU_H + +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#ifdef __ASSEMBLY__ +# define THIS_CPU(var) (var) /* use this to mark accesses to per-CPU variables... */ +#else /* !__ASSEMBLY__ */ + + +#include + +#ifdef CONFIG_SMP + +#ifdef HAVE_MODEL_SMALL_ATTRIBUTE +# define PER_CPU_ATTRIBUTES __attribute__((__model__ (__small__))) +#endif + +#define __my_cpu_offset __ia64_per_cpu_var(local_per_cpu_offset) + +extern void *per_cpu_init(void); + +#else /* ! SMP */ + +#define per_cpu_init() (__phys_per_cpu_start) + +#endif /* SMP */ + +#define PER_CPU_BASE_SECTION ".data..percpu" + +/* + * Be extremely careful when taking the address of this variable! Due to virtual + * remapping, it is different from the canonical address returned by this_cpu_ptr(&var)! + * On the positive side, using __ia64_per_cpu_var() instead of this_cpu_ptr() is slightly + * more efficient. + */ +#define __ia64_per_cpu_var(var) (*({ \ + __verify_pcpu_ptr(&(var)); \ + ((typeof(var) __kernel __force *)&(var)); \ +})) + +#include + +/* Equal to __per_cpu_offset[smp_processor_id()], but faster to access: */ +DECLARE_PER_CPU(unsigned long, local_per_cpu_offset); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_PERCPU_H */ diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h new file mode 100644 index 000000000000..0fb2b6291d58 --- /dev/null +++ b/arch/ia64/include/asm/pgalloc.h @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PGALLOC_H +#define _ASM_IA64_PGALLOC_H + +/* + * This file contains the functions and defines necessary to allocate + * page tables. + * + * This hopefully works with any (fixed) ia-64 page-size, as defined + * in (currently 8192). + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2000, Goutham Rao + */ + + +#include +#include +#include +#include + +#include + +#include + +static inline pgd_t *pgd_alloc(struct mm_struct *mm) +{ + return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); +} + +#if CONFIG_PGTABLE_LEVELS == 4 +static inline void +p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud) +{ + p4d_val(*p4d_entry) = __pa(pud); +} + +#define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) +#endif /* CONFIG_PGTABLE_LEVELS == 4 */ + +static inline void +pud_populate(struct mm_struct *mm, pud_t * pud_entry, pmd_t * pmd) +{ + pud_val(*pud_entry) = __pa(pmd); +} + +#define __pmd_free_tlb(tlb, pmd, address) pmd_free((tlb)->mm, pmd) + +static inline void +pmd_populate(struct mm_struct *mm, pmd_t * pmd_entry, pgtable_t pte) +{ + pmd_val(*pmd_entry) = page_to_phys(pte); +} + +static inline void +pmd_populate_kernel(struct mm_struct *mm, pmd_t * pmd_entry, pte_t * pte) +{ + pmd_val(*pmd_entry) = __pa(pte); +} + +#define __pte_free_tlb(tlb, pte, address) pte_free((tlb)->mm, pte) + +#endif /* _ASM_IA64_PGALLOC_H */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h new file mode 100644 index 000000000000..9be2d2ba6016 --- /dev/null +++ b/arch/ia64/include/asm/pgtable.h @@ -0,0 +1,545 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PGTABLE_H +#define _ASM_IA64_PGTABLE_H + +/* + * This file contains the functions and defines necessary to modify and use + * the IA-64 page table tree. + * + * This hopefully works with any (fixed) IA-64 page-size, as defined + * in . + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include +#include +#include +#include + +#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */ + +/* + * First, define the various bits in a PTE. Note that the PTE format + * matches the VHPT short format, the firt doubleword of the VHPD long + * format, and the first doubleword of the TLB insertion format. + */ +#define _PAGE_P_BIT 0 +#define _PAGE_A_BIT 5 +#define _PAGE_D_BIT 6 + +#define _PAGE_P (1 << _PAGE_P_BIT) /* page present bit */ +#define _PAGE_MA_WB (0x0 << 2) /* write back memory attribute */ +#define _PAGE_MA_UC (0x4 << 2) /* uncacheable memory attribute */ +#define _PAGE_MA_UCE (0x5 << 2) /* UC exported attribute */ +#define _PAGE_MA_WC (0x6 << 2) /* write coalescing memory attribute */ +#define _PAGE_MA_NAT (0x7 << 2) /* not-a-thing attribute */ +#define _PAGE_MA_MASK (0x7 << 2) +#define _PAGE_PL_0 (0 << 7) /* privilege level 0 (kernel) */ +#define _PAGE_PL_1 (1 << 7) /* privilege level 1 (unused) */ +#define _PAGE_PL_2 (2 << 7) /* privilege level 2 (unused) */ +#define _PAGE_PL_3 (3 << 7) /* privilege level 3 (user) */ +#define _PAGE_PL_MASK (3 << 7) +#define _PAGE_AR_R (0 << 9) /* read only */ +#define _PAGE_AR_RX (1 << 9) /* read & execute */ +#define _PAGE_AR_RW (2 << 9) /* read & write */ +#define _PAGE_AR_RWX (3 << 9) /* read, write & execute */ +#define _PAGE_AR_R_RW (4 << 9) /* read / read & write */ +#define _PAGE_AR_RX_RWX (5 << 9) /* read & exec / read, write & exec */ +#define _PAGE_AR_RWX_RW (6 << 9) /* read, write & exec / read & write */ +#define _PAGE_AR_X_RX (7 << 9) /* exec & promote / read & exec */ +#define _PAGE_AR_MASK (7 << 9) +#define _PAGE_AR_SHIFT 9 +#define _PAGE_A (1 << _PAGE_A_BIT) /* page accessed bit */ +#define _PAGE_D (1 << _PAGE_D_BIT) /* page dirty bit */ +#define _PAGE_PPN_MASK (((__IA64_UL(1) << IA64_MAX_PHYS_BITS) - 1) & ~0xfffUL) +#define _PAGE_ED (__IA64_UL(1) << 52) /* exception deferral */ +#define _PAGE_PROTNONE (__IA64_UL(1) << 63) + +/* We borrow bit 7 to store the exclusive marker in swap PTEs. */ +#define _PAGE_SWP_EXCLUSIVE (1 << 7) + +#define _PFN_MASK _PAGE_PPN_MASK +/* Mask of bits which may be changed by pte_modify(); the odd bits are there for _PAGE_PROTNONE */ +#define _PAGE_CHG_MASK (_PAGE_P | _PAGE_PROTNONE | _PAGE_PL_MASK | _PAGE_AR_MASK | _PAGE_ED) + +#define _PAGE_SIZE_4K 12 +#define _PAGE_SIZE_8K 13 +#define _PAGE_SIZE_16K 14 +#define _PAGE_SIZE_64K 16 +#define _PAGE_SIZE_256K 18 +#define _PAGE_SIZE_1M 20 +#define _PAGE_SIZE_4M 22 +#define _PAGE_SIZE_16M 24 +#define _PAGE_SIZE_64M 26 +#define _PAGE_SIZE_256M 28 +#define _PAGE_SIZE_1G 30 +#define _PAGE_SIZE_4G 32 + +#define __ACCESS_BITS _PAGE_ED | _PAGE_A | _PAGE_P | _PAGE_MA_WB +#define __DIRTY_BITS_NO_ED _PAGE_A | _PAGE_P | _PAGE_D | _PAGE_MA_WB +#define __DIRTY_BITS _PAGE_ED | __DIRTY_BITS_NO_ED + +/* + * How many pointers will a page table level hold expressed in shift + */ +#define PTRS_PER_PTD_SHIFT (PAGE_SHIFT-3) + +/* + * Definitions for fourth level: + */ +#define PTRS_PER_PTE (__IA64_UL(1) << (PTRS_PER_PTD_SHIFT)) + +/* + * Definitions for third level: + * + * PMD_SHIFT determines the size of the area a third-level page table + * can map. + */ +#define PMD_SHIFT (PAGE_SHIFT + (PTRS_PER_PTD_SHIFT)) +#define PMD_SIZE (1UL << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD (1UL << (PTRS_PER_PTD_SHIFT)) + +#if CONFIG_PGTABLE_LEVELS == 4 +/* + * Definitions for second level: + * + * PUD_SHIFT determines the size of the area a second-level page table + * can map. + */ +#define PUD_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PTRS_PER_PUD (1UL << (PTRS_PER_PTD_SHIFT)) +#endif + +/* + * Definitions for first level: + * + * PGDIR_SHIFT determines what a first-level page table entry can map. + */ +#if CONFIG_PGTABLE_LEVELS == 4 +#define PGDIR_SHIFT (PUD_SHIFT + (PTRS_PER_PTD_SHIFT)) +#else +#define PGDIR_SHIFT (PMD_SHIFT + (PTRS_PER_PTD_SHIFT)) +#endif +#define PGDIR_SIZE (__IA64_UL(1) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD_SHIFT PTRS_PER_PTD_SHIFT +#define PTRS_PER_PGD (1UL << PTRS_PER_PGD_SHIFT) +#define USER_PTRS_PER_PGD (5*PTRS_PER_PGD/8) /* regions 0-4 are user regions */ + +/* + * All the normal masks have the "page accessed" bits on, as any time + * they are used, the page is accessed. They are cleared only by the + * page-out routines. + */ +#define PAGE_NONE __pgprot(_PAGE_PROTNONE | _PAGE_A) +#define PAGE_SHARED __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RW) +#define PAGE_READONLY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#define PAGE_COPY __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_R) +#define PAGE_COPY_EXEC __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX) +#define PAGE_GATE __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_X_RX) +#define PAGE_KERNEL __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX) +#define PAGE_KERNELRX __pgprot(__ACCESS_BITS | _PAGE_PL_0 | _PAGE_AR_RX) +#define PAGE_KERNEL_UC __pgprot(__DIRTY_BITS | _PAGE_PL_0 | _PAGE_AR_RWX | \ + _PAGE_MA_UC) + +# ifndef __ASSEMBLY__ + +#include /* for mm_struct */ +#include +#include +#include + +/* + * Next come the mappings that determine how mmap() protection bits + * (PROT_EXEC, PROT_READ, PROT_WRITE, PROT_NONE) get implemented. The + * _P version gets used for a private shared memory segment, the _S + * version gets used for a shared memory segment with MAP_SHARED on. + * In a private shared memory segment, we do a copy-on-write if a task + * attempts to write to the page. + */ + /* xwr */ +#define pgd_ERROR(e) printk("%s:%d: bad pgd %016lx.\n", __FILE__, __LINE__, pgd_val(e)) +#if CONFIG_PGTABLE_LEVELS == 4 +#define pud_ERROR(e) printk("%s:%d: bad pud %016lx.\n", __FILE__, __LINE__, pud_val(e)) +#endif +#define pmd_ERROR(e) printk("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pte_ERROR(e) printk("%s:%d: bad pte %016lx.\n", __FILE__, __LINE__, pte_val(e)) + + +/* + * Some definitions to translate between mem_map, PTEs, and page addresses: + */ + + +/* Quick test to see if ADDR is a (potentially) valid physical address. */ +static inline long +ia64_phys_addr_valid (unsigned long addr) +{ + return (addr & (local_cpu_data->unimpl_pa_mask)) == 0; +} + +/* + * Now come the defines and routines to manage and access the three-level + * page table. + */ + + +#define VMALLOC_START (RGN_BASE(RGN_GATE) + 0x200000000UL) +#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_SPARSEMEM_VMEMMAP) +/* SPARSEMEM_VMEMMAP uses half of vmalloc... */ +# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 10))) +# define vmemmap ((struct page *)VMALLOC_END) +#else +# define VMALLOC_END (RGN_BASE(RGN_GATE) + (1UL << (4*PAGE_SHIFT - 9))) +#endif + +/* fs/proc/kcore.c */ +#define kc_vaddr_to_offset(v) ((v) - RGN_BASE(RGN_GATE)) +#define kc_offset_to_vaddr(o) ((o) + RGN_BASE(RGN_GATE)) + +#define RGN_MAP_SHIFT (PGDIR_SHIFT + PTRS_PER_PGD_SHIFT - 3) +#define RGN_MAP_LIMIT ((1UL << RGN_MAP_SHIFT) - PAGE_SIZE) /* per region addr limit */ + +#define PFN_PTE_SHIFT PAGE_SHIFT +/* + * Conversion functions: convert page frame number (pfn) and a protection value to a page + * table entry (pte). + */ +#define pfn_pte(pfn, pgprot) \ +({ pte_t __pte; pte_val(__pte) = ((pfn) << PAGE_SHIFT) | pgprot_val(pgprot); __pte; }) + +/* Extract pfn from pte. */ +#define pte_pfn(_pte) ((pte_val(_pte) & _PFN_MASK) >> PAGE_SHIFT) + +#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) + +/* This takes a physical page address that is used by the remapping functions */ +#define mk_pte_phys(physpage, pgprot) \ +({ pte_t __pte; pte_val(__pte) = physpage + pgprot_val(pgprot); __pte; }) + +#define pte_modify(_pte, newprot) \ + (__pte((pte_val(_pte) & ~_PAGE_CHG_MASK) | (pgprot_val(newprot) & _PAGE_CHG_MASK))) + +#define pte_none(pte) (!pte_val(pte)) +#define pte_present(pte) (pte_val(pte) & (_PAGE_P | _PAGE_PROTNONE)) +#define pte_clear(mm,addr,pte) (pte_val(*(pte)) = 0UL) +/* pte_page() returns the "struct page *" corresponding to the PTE: */ +#define pte_page(pte) virt_to_page(((pte_val(pte) & _PFN_MASK) + PAGE_OFFSET)) + +#define pmd_none(pmd) (!pmd_val(pmd)) +#define pmd_bad(pmd) (!ia64_phys_addr_valid(pmd_val(pmd))) +#define pmd_present(pmd) (pmd_val(pmd) != 0UL) +#define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0UL) +#define pmd_page_vaddr(pmd) ((unsigned long) __va(pmd_val(pmd) & _PFN_MASK)) +#define pmd_pfn(pmd) ((pmd_val(pmd) & _PFN_MASK) >> PAGE_SHIFT) +#define pmd_page(pmd) virt_to_page((pmd_val(pmd) + PAGE_OFFSET)) + +#define pud_none(pud) (!pud_val(pud)) +#define pud_bad(pud) (!ia64_phys_addr_valid(pud_val(pud))) +#define pud_present(pud) (pud_val(pud) != 0UL) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) +#define pud_pgtable(pud) ((pmd_t *) __va(pud_val(pud) & _PFN_MASK)) +#define pud_page(pud) virt_to_page((pud_val(pud) + PAGE_OFFSET)) + +#if CONFIG_PGTABLE_LEVELS == 4 +#define p4d_none(p4d) (!p4d_val(p4d)) +#define p4d_bad(p4d) (!ia64_phys_addr_valid(p4d_val(p4d))) +#define p4d_present(p4d) (p4d_val(p4d) != 0UL) +#define p4d_clear(p4dp) (p4d_val(*(p4dp)) = 0UL) +#define p4d_pgtable(p4d) ((pud_t *) __va(p4d_val(p4d) & _PFN_MASK)) +#define p4d_page(p4d) virt_to_page((p4d_val(p4d) + PAGE_OFFSET)) +#endif + +/* + * The following have defined behavior only work if pte_present() is true. + */ +#define pte_write(pte) ((unsigned) (((pte_val(pte) & _PAGE_AR_MASK) >> _PAGE_AR_SHIFT) - 2) <= 4) +#define pte_exec(pte) ((pte_val(pte) & _PAGE_AR_RX) != 0) +#define pte_dirty(pte) ((pte_val(pte) & _PAGE_D) != 0) +#define pte_young(pte) ((pte_val(pte) & _PAGE_A) != 0) + +/* + * Note: we convert AR_RWX to AR_RX and AR_RW to AR_R by clearing the 2nd bit in the + * access rights: + */ +#define pte_wrprotect(pte) (__pte(pte_val(pte) & ~_PAGE_AR_RW)) +#define pte_mkwrite_novma(pte) (__pte(pte_val(pte) | _PAGE_AR_RW)) +#define pte_mkold(pte) (__pte(pte_val(pte) & ~_PAGE_A)) +#define pte_mkyoung(pte) (__pte(pte_val(pte) | _PAGE_A)) +#define pte_mkclean(pte) (__pte(pte_val(pte) & ~_PAGE_D)) +#define pte_mkdirty(pte) (__pte(pte_val(pte) | _PAGE_D)) +#define pte_mkhuge(pte) (__pte(pte_val(pte))) + +/* + * Because ia64's Icache and Dcache is not coherent (on a cpu), we need to + * sync icache and dcache when we insert *new* executable page. + * __ia64_sync_icache_dcache() check Pg_arch_1 bit and flush icache + * if necessary. + * + * set_pte() is also called by the kernel, but we can expect that the kernel + * flushes icache explicitly if necessary. + */ +#define pte_present_exec_user(pte)\ + ((pte_val(pte) & (_PAGE_P | _PAGE_PL_MASK | _PAGE_AR_RX)) == \ + (_PAGE_P | _PAGE_PL_3 | _PAGE_AR_RX)) + +extern void __ia64_sync_icache_dcache(pte_t pteval); +static inline void set_pte(pte_t *ptep, pte_t pteval) +{ + /* page is present && page is user && page is executable + * && (page swapin or new page or page migration + * || copy_on_write with page copying.) + */ + if (pte_present_exec_user(pteval) && + (!pte_present(*ptep) || + pte_pfn(*ptep) != pte_pfn(pteval))) + /* load_module() calles flush_icache_range() explicitly*/ + __ia64_sync_icache_dcache(pteval); + *ptep = pteval; +} + +/* + * Make page protection values cacheable, uncacheable, or write- + * combining. Note that "protection" is really a misnomer here as the + * protection value contains the memory attribute bits, dirty bits, and + * various other bits as well. + */ +#define pgprot_cacheable(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WB) +#define pgprot_noncached(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_UC) +#define pgprot_writecombine(prot) __pgprot((pgprot_val(prot) & ~_PAGE_MA_MASK) | _PAGE_MA_WC) + +struct file; +extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, + unsigned long size, pgprot_t vma_prot); +#define __HAVE_PHYS_MEM_ACCESS_PROT + +static inline unsigned long +pgd_index (unsigned long address) +{ + unsigned long region = address >> 61; + unsigned long l1index = (address >> PGDIR_SHIFT) & ((PTRS_PER_PGD >> 3) - 1); + + return (region << (PAGE_SHIFT - 6)) | l1index; +} +#define pgd_index pgd_index + +/* + * In the kernel's mapped region we know everything is in region number 5, so + * as an optimisation its PGD already points to the area for that region. + * However, this also means that we cannot use pgd_index() and we must + * never add the region here. + */ +#define pgd_offset_k(addr) \ + (init_mm.pgd + (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1))) + +/* Look up a pgd entry in the gate area. On IA-64, the gate-area + resides in the kernel-mapped segment, hence we use pgd_offset_k() + here. */ +#define pgd_offset_gate(mm, addr) pgd_offset_k(addr) + +/* atomic versions of the some PTE manipulations: */ + +static inline int +ptep_test_and_clear_young (struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + if (!pte_young(*ptep)) + return 0; + return test_and_clear_bit(_PAGE_A_BIT, ptep); +#else + pte_t pte = *ptep; + if (!pte_young(pte)) + return 0; + set_pte_at(vma->vm_mm, addr, ptep, pte_mkold(pte)); + return 1; +#endif +} + +static inline pte_t +ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + return __pte(xchg((long *) ptep, 0)); +#else + pte_t pte = *ptep; + pte_clear(mm, addr, ptep); + return pte; +#endif +} + +static inline void +ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) +{ +#ifdef CONFIG_SMP + unsigned long new, old; + + do { + old = pte_val(*ptep); + new = pte_val(pte_wrprotect(__pte (old))); + } while (cmpxchg((unsigned long *) ptep, old, new) != old); +#else + pte_t old_pte = *ptep; + set_pte_at(mm, addr, ptep, pte_wrprotect(old_pte)); +#endif +} + +static inline int +pte_same (pte_t a, pte_t b) +{ + return pte_val(a) == pte_val(b); +} + +#define update_mmu_cache_range(vmf, vma, address, ptep, nr) do { } while (0) +#define update_mmu_cache(vma, address, ptep) do { } while (0) + +extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; +extern void paging_init (void); + +/* + * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs that + * are !pte_none() && !pte_present(). + * + * Note: The macros below rely on the fact that MAX_SWAPFILES_SHIFT <= number of + * bits in the swap-type field of the swap pte. It would be nice to + * enforce that, but we can't easily include here. + * (Of course, better still would be to define MAX_SWAPFILES_SHIFT here...). + * + * Format of swap pte: + * bit 0 : present bit (must be zero) + * bits 1- 6: swap type + * bit 7 : exclusive marker + * bits 8-62: swap offset + * bit 63 : _PAGE_PROTNONE bit + */ +#define __swp_type(entry) (((entry).val >> 1) & 0x3f) +#define __swp_offset(entry) (((entry).val << 1) >> 9) +#define __swp_entry(type, offset) ((swp_entry_t) { ((type & 0x3f) << 1) | \ + ((long) (offset) << 8) }) +#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) }) +#define __swp_entry_to_pte(x) ((pte_t) { (x).val }) + +static inline int pte_swp_exclusive(pte_t pte) +{ + return pte_val(pte) & _PAGE_SWP_EXCLUSIVE; +} + +static inline pte_t pte_swp_mkexclusive(pte_t pte) +{ + pte_val(pte) |= _PAGE_SWP_EXCLUSIVE; + return pte; +} + +static inline pte_t pte_swp_clear_exclusive(pte_t pte) +{ + pte_val(pte) &= ~_PAGE_SWP_EXCLUSIVE; + return pte; +} + +/* + * ZERO_PAGE is a global shared page that is always zero: used + * for zero-mapped memory areas etc.. + */ +extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; +extern struct page *zero_page_memmap_ptr; +#define ZERO_PAGE(vaddr) (zero_page_memmap_ptr) + +/* We provide our own get_unmapped_area to cope with VA holes for userland */ +#define HAVE_ARCH_UNMAPPED_AREA + +#ifdef CONFIG_HUGETLB_PAGE +#define HUGETLB_PGDIR_SHIFT (HPAGE_SHIFT + 2*(PAGE_SHIFT-3)) +#define HUGETLB_PGDIR_SIZE (__IA64_UL(1) << HUGETLB_PGDIR_SHIFT) +#define HUGETLB_PGDIR_MASK (~(HUGETLB_PGDIR_SIZE-1)) +#endif + + +#define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS +/* + * Update PTEP with ENTRY, which is guaranteed to be a less + * restrictive PTE. That is, ENTRY may have the ACCESSED, DIRTY, and + * WRITABLE bits turned on, when the value at PTEP did not. The + * WRITABLE bit may only be turned if SAFELY_WRITABLE is TRUE. + * + * SAFELY_WRITABLE is TRUE if we can update the value at PTEP without + * having to worry about races. On SMP machines, there are only two + * cases where this is true: + * + * (1) *PTEP has the PRESENT bit turned OFF + * (2) ENTRY has the DIRTY bit turned ON + * + * On ia64, we could implement this routine with a cmpxchg()-loop + * which ORs in the _PAGE_A/_PAGE_D bit if they're set in ENTRY. + * However, like on x86, we can get a more streamlined version by + * observing that it is OK to drop ACCESSED bit updates when + * SAFELY_WRITABLE is FALSE. Besides being rare, all that would do is + * result in an extra Access-bit fault, which would then turn on the + * ACCESSED bit in the low-level fault handler (iaccess_bit or + * daccess_bit in ivt.S). + */ +#ifdef CONFIG_SMP +# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ +({ \ + int __changed = !pte_same(*(__ptep), __entry); \ + if (__changed && __safely_writable) { \ + set_pte(__ptep, __entry); \ + flush_tlb_page(__vma, __addr); \ + } \ + __changed; \ +}) +#else +# define ptep_set_access_flags(__vma, __addr, __ptep, __entry, __safely_writable) \ +({ \ + int __changed = !pte_same(*(__ptep), __entry); \ + if (__changed) { \ + set_pte_at((__vma)->vm_mm, (__addr), __ptep, __entry); \ + flush_tlb_page(__vma, __addr); \ + } \ + __changed; \ +}) +#endif +# endif /* !__ASSEMBLY__ */ + +/* + * Identity-mapped regions use a large page size. We'll call such large pages + * "granules". If you can think of a better name that's unambiguous, let me + * know... + */ +#if defined(CONFIG_IA64_GRANULE_64MB) +# define IA64_GRANULE_SHIFT _PAGE_SIZE_64M +#elif defined(CONFIG_IA64_GRANULE_16MB) +# define IA64_GRANULE_SHIFT _PAGE_SIZE_16M +#endif +#define IA64_GRANULE_SIZE (1 << IA64_GRANULE_SHIFT) +/* + * log2() of the page size we use to map the kernel image (IA64_TR_KERNEL): + */ +#define KERNEL_TR_PAGE_SHIFT _PAGE_SIZE_64M +#define KERNEL_TR_PAGE_SIZE (1 << KERNEL_TR_PAGE_SHIFT) + +/* These tell get_user_pages() that the first gate page is accessible from user-level. */ +#define FIXADDR_USER_START GATE_ADDR +#ifdef HAVE_BUGGY_SEGREL +# define FIXADDR_USER_END (GATE_ADDR + 2*PAGE_SIZE) +#else +# define FIXADDR_USER_END (GATE_ADDR + 2*PERCPU_PAGE_SIZE) +#endif + +#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR +#define __HAVE_ARCH_PTEP_SET_WRPROTECT +#define __HAVE_ARCH_PTE_SAME +#define __HAVE_ARCH_PGD_OFFSET_GATE + + +#if CONFIG_PGTABLE_LEVELS == 3 +#include +#endif +#include + +#endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h new file mode 100644 index 000000000000..47e3801b526a --- /dev/null +++ b/arch/ia64/include/asm/processor.h @@ -0,0 +1,660 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_PROCESSOR_H +#define _ASM_IA64_PROCESSOR_H + +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + * + * 11/24/98 S.Eranian added ia64_set_iva() + * 12/03/99 D. Mosberger implement thread_saved_pc() via kernel unwind API + * 06/16/00 A. Mallick added csd/ssd/tssd for ia32 support + */ + + +#include +#include +#include +#include + +#define IA64_NUM_PHYS_STACK_REG 96 +#define IA64_NUM_DBG_REGS 8 + +#define DEFAULT_MAP_BASE __IA64_UL_CONST(0x2000000000000000) +#define DEFAULT_TASK_SIZE __IA64_UL_CONST(0xa000000000000000) + +/* + * TASK_SIZE really is a mis-named. It really is the maximum user + * space address (plus one). On IA-64, there are five regions of 2TB + * each (assuming 8KB page size), for a total of 8TB of user virtual + * address space. + */ +#define TASK_SIZE DEFAULT_TASK_SIZE + +/* + * This decides where the kernel will search for a free chunk of vm + * space during mmap's. + */ +#define TASK_UNMAPPED_BASE (current->thread.map_base) + +#define IA64_THREAD_FPH_VALID (__IA64_UL(1) << 0) /* floating-point high state valid? */ +#define IA64_THREAD_DBG_VALID (__IA64_UL(1) << 1) /* debug registers valid? */ +#define IA64_THREAD_PM_VALID (__IA64_UL(1) << 2) /* performance registers valid? */ +#define IA64_THREAD_UAC_NOPRINT (__IA64_UL(1) << 3) /* don't log unaligned accesses */ +#define IA64_THREAD_UAC_SIGBUS (__IA64_UL(1) << 4) /* generate SIGBUS on unaligned acc. */ +#define IA64_THREAD_MIGRATION (__IA64_UL(1) << 5) /* require migration + sync at ctx sw */ +#define IA64_THREAD_FPEMU_NOPRINT (__IA64_UL(1) << 6) /* don't log any fpswa faults */ +#define IA64_THREAD_FPEMU_SIGFPE (__IA64_UL(1) << 7) /* send a SIGFPE for fpswa faults */ + +#define IA64_THREAD_UAC_SHIFT 3 +#define IA64_THREAD_UAC_MASK (IA64_THREAD_UAC_NOPRINT | IA64_THREAD_UAC_SIGBUS) +#define IA64_THREAD_FPEMU_SHIFT 6 +#define IA64_THREAD_FPEMU_MASK (IA64_THREAD_FPEMU_NOPRINT | IA64_THREAD_FPEMU_SIGFPE) + + +/* + * This shift should be large enough to be able to represent 1000000000/itc_freq with good + * accuracy while being small enough to fit 10*1000000000< +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#ifdef CONFIG_NUMA +#include +#endif + +/* like above but expressed as bitfields for more efficient access: */ +struct ia64_psr { + __u64 reserved0 : 1; + __u64 be : 1; + __u64 up : 1; + __u64 ac : 1; + __u64 mfl : 1; + __u64 mfh : 1; + __u64 reserved1 : 7; + __u64 ic : 1; + __u64 i : 1; + __u64 pk : 1; + __u64 reserved2 : 1; + __u64 dt : 1; + __u64 dfl : 1; + __u64 dfh : 1; + __u64 sp : 1; + __u64 pp : 1; + __u64 di : 1; + __u64 si : 1; + __u64 db : 1; + __u64 lp : 1; + __u64 tb : 1; + __u64 rt : 1; + __u64 reserved3 : 4; + __u64 cpl : 2; + __u64 is : 1; + __u64 mc : 1; + __u64 it : 1; + __u64 id : 1; + __u64 da : 1; + __u64 dd : 1; + __u64 ss : 1; + __u64 ri : 2; + __u64 ed : 1; + __u64 bn : 1; + __u64 reserved4 : 19; +}; + +union ia64_isr { + __u64 val; + struct { + __u64 code : 16; + __u64 vector : 8; + __u64 reserved1 : 8; + __u64 x : 1; + __u64 w : 1; + __u64 r : 1; + __u64 na : 1; + __u64 sp : 1; + __u64 rs : 1; + __u64 ir : 1; + __u64 ni : 1; + __u64 so : 1; + __u64 ei : 2; + __u64 ed : 1; + __u64 reserved2 : 20; + }; +}; + +union ia64_lid { + __u64 val; + struct { + __u64 rv : 16; + __u64 eid : 8; + __u64 id : 8; + __u64 ig : 32; + }; +}; + +union ia64_tpr { + __u64 val; + struct { + __u64 ig0 : 4; + __u64 mic : 4; + __u64 rsv : 8; + __u64 mmi : 1; + __u64 ig1 : 47; + }; +}; + +union ia64_itir { + __u64 val; + struct { + __u64 rv3 : 2; /* 0-1 */ + __u64 ps : 6; /* 2-7 */ + __u64 key : 24; /* 8-31 */ + __u64 rv4 : 32; /* 32-63 */ + }; +}; + +union ia64_rr { + __u64 val; + struct { + __u64 ve : 1; /* enable hw walker */ + __u64 reserved0: 1; /* reserved */ + __u64 ps : 6; /* log page size */ + __u64 rid : 24; /* region id */ + __u64 reserved1: 32; /* reserved */ + }; +}; + +/* + * CPU type, hardware bug flags, and per-CPU state. Frequently used + * state comes earlier: + */ +struct cpuinfo_ia64 { + unsigned int softirq_pending; + unsigned long itm_delta; /* # of clock cycles between clock ticks */ + unsigned long itm_next; /* interval timer mask value to use for next clock tick */ + unsigned long nsec_per_cyc; /* (1000000000<thread.flags = (((task)->thread.flags & ~IA64_THREAD_UAC_MASK) \ + | (((value) << IA64_THREAD_UAC_SHIFT) & IA64_THREAD_UAC_MASK)); \ + 0; \ +}) +#define GET_UNALIGN_CTL(task,addr) \ +({ \ + put_user(((task)->thread.flags & IA64_THREAD_UAC_MASK) >> IA64_THREAD_UAC_SHIFT, \ + (int __user *) (addr)); \ +}) + +#define SET_FPEMU_CTL(task,value) \ +({ \ + (task)->thread.flags = (((task)->thread.flags & ~IA64_THREAD_FPEMU_MASK) \ + | (((value) << IA64_THREAD_FPEMU_SHIFT) & IA64_THREAD_FPEMU_MASK)); \ + 0; \ +}) +#define GET_FPEMU_CTL(task,addr) \ +({ \ + put_user(((task)->thread.flags & IA64_THREAD_FPEMU_MASK) >> IA64_THREAD_FPEMU_SHIFT, \ + (int __user *) (addr)); \ +}) + +struct thread_struct { + __u32 flags; /* various thread flags (see IA64_THREAD_*) */ + /* writing on_ustack is performance-critical, so it's worth spending 8 bits on it... */ + __u8 on_ustack; /* executing on user-stacks? */ + __u8 pad[3]; + __u64 ksp; /* kernel stack pointer */ + __u64 map_base; /* base address for get_unmapped_area() */ + __u64 rbs_bot; /* the base address for the RBS */ + int last_fph_cpu; /* CPU that may hold the contents of f32-f127 */ + unsigned long dbr[IA64_NUM_DBG_REGS]; + unsigned long ibr[IA64_NUM_DBG_REGS]; + struct ia64_fpreg fph[96]; /* saved/loaded on demand */ +}; + +#define INIT_THREAD { \ + .flags = 0, \ + .on_ustack = 0, \ + .ksp = 0, \ + .map_base = DEFAULT_MAP_BASE, \ + .rbs_bot = STACK_TOP - DEFAULT_USER_STACK_SIZE, \ + .last_fph_cpu = -1, \ + .dbr = {0, }, \ + .ibr = {0, }, \ + .fph = {{{{0}}}, } \ +} + +#define start_thread(regs,new_ip,new_sp) do { \ + regs->cr_ipsr = ((regs->cr_ipsr | (IA64_PSR_BITS_TO_SET | IA64_PSR_CPL)) \ + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_RI | IA64_PSR_IS)); \ + regs->cr_iip = new_ip; \ + regs->ar_rsc = 0xf; /* eager mode, privilege level 3 */ \ + regs->ar_rnat = 0; \ + regs->ar_bspstore = current->thread.rbs_bot; \ + regs->ar_fpsr = FPSR_DEFAULT; \ + regs->loadrs = 0; \ + regs->r8 = get_dumpable(current->mm); /* set "don't zap registers" flag */ \ + regs->r12 = new_sp - 16; /* allocate 16 byte scratch area */ \ + if (unlikely(get_dumpable(current->mm) != SUID_DUMP_USER)) { \ + /* \ + * Zap scratch regs to avoid leaking bits between processes with different \ + * uid/privileges. \ + */ \ + regs->ar_pfs = 0; regs->b0 = 0; regs->pr = 0; \ + regs->r1 = 0; regs->r9 = 0; regs->r11 = 0; regs->r13 = 0; regs->r15 = 0; \ + } \ +} while (0) + +/* Forward declarations, a strange C thing... */ +struct mm_struct; +struct task_struct; + +/* Get wait channel for task P. */ +extern unsigned long __get_wchan (struct task_struct *p); + +/* Return instruction pointer of blocked task TSK. */ +#define KSTK_EIP(tsk) \ + ({ \ + struct pt_regs *_regs = task_pt_regs(tsk); \ + _regs->cr_iip + ia64_psr(_regs)->ri; \ + }) + +/* Return stack pointer of blocked task TSK. */ +#define KSTK_ESP(tsk) ((tsk)->thread.ksp) + +extern void ia64_getreg_unknown_kr (void); +extern void ia64_setreg_unknown_kr (void); + +#define ia64_get_kr(regnum) \ +({ \ + unsigned long r = 0; \ + \ + switch (regnum) { \ + case 0: r = ia64_getreg(_IA64_REG_AR_KR0); break; \ + case 1: r = ia64_getreg(_IA64_REG_AR_KR1); break; \ + case 2: r = ia64_getreg(_IA64_REG_AR_KR2); break; \ + case 3: r = ia64_getreg(_IA64_REG_AR_KR3); break; \ + case 4: r = ia64_getreg(_IA64_REG_AR_KR4); break; \ + case 5: r = ia64_getreg(_IA64_REG_AR_KR5); break; \ + case 6: r = ia64_getreg(_IA64_REG_AR_KR6); break; \ + case 7: r = ia64_getreg(_IA64_REG_AR_KR7); break; \ + default: ia64_getreg_unknown_kr(); break; \ + } \ + r; \ +}) + +#define ia64_set_kr(regnum, r) \ +({ \ + switch (regnum) { \ + case 0: ia64_setreg(_IA64_REG_AR_KR0, r); break; \ + case 1: ia64_setreg(_IA64_REG_AR_KR1, r); break; \ + case 2: ia64_setreg(_IA64_REG_AR_KR2, r); break; \ + case 3: ia64_setreg(_IA64_REG_AR_KR3, r); break; \ + case 4: ia64_setreg(_IA64_REG_AR_KR4, r); break; \ + case 5: ia64_setreg(_IA64_REG_AR_KR5, r); break; \ + case 6: ia64_setreg(_IA64_REG_AR_KR6, r); break; \ + case 7: ia64_setreg(_IA64_REG_AR_KR7, r); break; \ + default: ia64_setreg_unknown_kr(); break; \ + } \ +}) + +/* + * The following three macros can't be inline functions because we don't have struct + * task_struct at this point. + */ + +/* + * Return TRUE if task T owns the fph partition of the CPU we're running on. + * Must be called from code that has preemption disabled. + */ +#define ia64_is_local_fpu_owner(t) \ +({ \ + struct task_struct *__ia64_islfo_task = (t); \ + (__ia64_islfo_task->thread.last_fph_cpu == smp_processor_id() \ + && __ia64_islfo_task == (struct task_struct *) ia64_get_kr(IA64_KR_FPU_OWNER)); \ +}) + +/* + * Mark task T as owning the fph partition of the CPU we're running on. + * Must be called from code that has preemption disabled. + */ +#define ia64_set_local_fpu_owner(t) do { \ + struct task_struct *__ia64_slfo_task = (t); \ + __ia64_slfo_task->thread.last_fph_cpu = smp_processor_id(); \ + ia64_set_kr(IA64_KR_FPU_OWNER, (unsigned long) __ia64_slfo_task); \ +} while (0) + +/* Mark the fph partition of task T as being invalid on all CPUs. */ +#define ia64_drop_fpu(t) ((t)->thread.last_fph_cpu = -1) + +extern void __ia64_init_fpu (void); +extern void __ia64_save_fpu (struct ia64_fpreg *fph); +extern void __ia64_load_fpu (struct ia64_fpreg *fph); +extern void ia64_save_debug_regs (unsigned long *save_area); +extern void ia64_load_debug_regs (unsigned long *save_area); + +#define ia64_fph_enable() do { ia64_rsm(IA64_PSR_DFH); ia64_srlz_d(); } while (0) +#define ia64_fph_disable() do { ia64_ssm(IA64_PSR_DFH); ia64_srlz_d(); } while (0) + +/* load fp 0.0 into fph */ +static inline void +ia64_init_fpu (void) { + ia64_fph_enable(); + __ia64_init_fpu(); + ia64_fph_disable(); +} + +/* save f32-f127 at FPH */ +static inline void +ia64_save_fpu (struct ia64_fpreg *fph) { + ia64_fph_enable(); + __ia64_save_fpu(fph); + ia64_fph_disable(); +} + +/* load f32-f127 from FPH */ +static inline void +ia64_load_fpu (struct ia64_fpreg *fph) { + ia64_fph_enable(); + __ia64_load_fpu(fph); + ia64_fph_disable(); +} + +static inline __u64 +ia64_clear_ic (void) +{ + __u64 psr; + psr = ia64_getreg(_IA64_REG_PSR); + ia64_stop(); + ia64_rsm(IA64_PSR_I | IA64_PSR_IC); + ia64_srlz_i(); + return psr; +} + +/* + * Restore the psr. + */ +static inline void +ia64_set_psr (__u64 psr) +{ + ia64_stop(); + ia64_setreg(_IA64_REG_PSR_L, psr); + ia64_srlz_i(); +} + +/* + * Insert a translation into an instruction and/or data translation + * register. + */ +static inline void +ia64_itr (__u64 target_mask, __u64 tr_num, + __u64 vmaddr, __u64 pte, + __u64 log_page_size) +{ + ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2)); + ia64_setreg(_IA64_REG_CR_IFA, vmaddr); + ia64_stop(); + if (target_mask & 0x1) + ia64_itri(tr_num, pte); + if (target_mask & 0x2) + ia64_itrd(tr_num, pte); +} + +/* + * Insert a translation into the instruction and/or data translation + * cache. + */ +static inline void +ia64_itc (__u64 target_mask, __u64 vmaddr, __u64 pte, + __u64 log_page_size) +{ + ia64_setreg(_IA64_REG_CR_ITIR, (log_page_size << 2)); + ia64_setreg(_IA64_REG_CR_IFA, vmaddr); + ia64_stop(); + /* as per EAS2.6, itc must be the last instruction in an instruction group */ + if (target_mask & 0x1) + ia64_itci(pte); + if (target_mask & 0x2) + ia64_itcd(pte); +} + +/* + * Purge a range of addresses from instruction and/or data translation + * register(s). + */ +static inline void +ia64_ptr (__u64 target_mask, __u64 vmaddr, __u64 log_size) +{ + if (target_mask & 0x1) + ia64_ptri(vmaddr, (log_size << 2)); + if (target_mask & 0x2) + ia64_ptrd(vmaddr, (log_size << 2)); +} + +/* Set the interrupt vector address. The address must be suitably aligned (32KB). */ +static inline void +ia64_set_iva (void *ivt_addr) +{ + ia64_setreg(_IA64_REG_CR_IVA, (__u64) ivt_addr); + ia64_srlz_i(); +} + +/* Set the page table address and control bits. */ +static inline void +ia64_set_pta (__u64 pta) +{ + /* Note: srlz.i implies srlz.d */ + ia64_setreg(_IA64_REG_CR_PTA, pta); + ia64_srlz_i(); +} + +static inline void +ia64_eoi (void) +{ + ia64_setreg(_IA64_REG_CR_EOI, 0); + ia64_srlz_d(); +} + +#define cpu_relax() ia64_hint(ia64_hint_pause) + +static inline int +ia64_get_irr(unsigned int vector) +{ + unsigned int reg = vector / 64; + unsigned int bit = vector % 64; + unsigned long irr; + + switch (reg) { + case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break; + case 1: irr = ia64_getreg(_IA64_REG_CR_IRR1); break; + case 2: irr = ia64_getreg(_IA64_REG_CR_IRR2); break; + case 3: irr = ia64_getreg(_IA64_REG_CR_IRR3); break; + } + + return test_bit(bit, &irr); +} + +static inline void +ia64_set_lrr0 (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_LRR0, val); + ia64_srlz_d(); +} + +static inline void +ia64_set_lrr1 (unsigned long val) +{ + ia64_setreg(_IA64_REG_CR_LRR1, val); + ia64_srlz_d(); +} + + +/* + * Given the address to which a spill occurred, return the unat bit + * number that corresponds to this address. + */ +static inline __u64 +ia64_unat_pos (void *spill_addr) +{ + return ((__u64) spill_addr >> 3) & 0x3f; +} + +/* + * Set the NaT bit of an integer register which was spilled at address + * SPILL_ADDR. UNAT is the mask to be updated. + */ +static inline void +ia64_set_unat (__u64 *unat, void *spill_addr, unsigned long nat) +{ + __u64 bit = ia64_unat_pos(spill_addr); + __u64 mask = 1UL << bit; + + *unat = (*unat & ~mask) | (nat << bit); +} + +static inline __u64 +ia64_get_ivr (void) +{ + __u64 r; + ia64_srlz_d(); + r = ia64_getreg(_IA64_REG_CR_IVR); + ia64_srlz_d(); + return r; +} + +static inline void +ia64_set_dbr (__u64 regnum, __u64 value) +{ + __ia64_set_dbr(regnum, value); +#ifdef CONFIG_ITANIUM + ia64_srlz_d(); +#endif +} + +static inline __u64 +ia64_get_dbr (__u64 regnum) +{ + __u64 retval; + + retval = __ia64_get_dbr(regnum); +#ifdef CONFIG_ITANIUM + ia64_srlz_d(); +#endif + return retval; +} + +static inline __u64 +ia64_rotr (__u64 w, __u64 n) +{ + return (w >> n) | (w << (64 - n)); +} + +#define ia64_rotl(w,n) ia64_rotr((w), (64) - (n)) + +/* + * Take a mapped kernel address and return the equivalent address + * in the region 7 identity mapped virtual area. + */ +static inline void * +ia64_imva (void *addr) +{ + void *result; + result = (void *) ia64_tpa(addr); + return __va(result); +} + +#define ARCH_HAS_PREFETCH +#define ARCH_HAS_PREFETCHW +#define PREFETCH_STRIDE L1_CACHE_BYTES + +static inline void +prefetch (const void *x) +{ + ia64_lfetch(ia64_lfhint_none, x); +} + +static inline void +prefetchw (const void *x) +{ + ia64_lfetch_excl(ia64_lfhint_none, x); +} + +extern unsigned long boot_option_idle_override; + +enum idle_boot_override {IDLE_NO_OVERRIDE=0, IDLE_HALT, IDLE_FORCE_MWAIT, + IDLE_NOMWAIT, IDLE_POLL}; + +void default_idle(void); + +#endif /* !__ASSEMBLY__ */ + +#endif /* _ASM_IA64_PROCESSOR_H */ diff --git a/arch/ia64/include/asm/ptrace.h b/arch/ia64/include/asm/ptrace.h new file mode 100644 index 000000000000..402874489890 --- /dev/null +++ b/arch/ia64/include/asm/ptrace.h @@ -0,0 +1,146 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2003 Intel Co + * Suresh Siddha + * Fenghua Yu + * Arun Sharma + * + * 12/07/98 S. Eranian added pt_regs & switch_stack + * 12/21/98 D. Mosberger updated to match latest code + * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" + * + */ +#ifndef _ASM_IA64_PTRACE_H +#define _ASM_IA64_PTRACE_H + +#ifndef ASM_OFFSETS_C +#include +#endif +#include + +/* + * Base-2 logarithm of number of pages to allocate per task structure + * (including register backing store and memory stack): + */ +#if defined(CONFIG_IA64_PAGE_SIZE_4KB) +# define KERNEL_STACK_SIZE_ORDER 3 +#elif defined(CONFIG_IA64_PAGE_SIZE_8KB) +# define KERNEL_STACK_SIZE_ORDER 2 +#elif defined(CONFIG_IA64_PAGE_SIZE_16KB) +# define KERNEL_STACK_SIZE_ORDER 1 +#else +# define KERNEL_STACK_SIZE_ORDER 0 +#endif + +#define IA64_RBS_OFFSET ((IA64_TASK_SIZE + IA64_THREAD_INFO_SIZE + 31) & ~31) +#define IA64_STK_OFFSET ((1 << KERNEL_STACK_SIZE_ORDER)*PAGE_SIZE) + +#define KERNEL_STACK_SIZE IA64_STK_OFFSET + +#ifndef __ASSEMBLY__ + +#include +#include + +/* + * We use the ia64_psr(regs)->ri to determine which of the three + * instructions in bundle (16 bytes) took the sample. Generate + * the canonical representation by adding to instruction pointer. + */ +# define instruction_pointer(regs) ((regs)->cr_iip + ia64_psr(regs)->ri) +# define instruction_pointer_set(regs, val) \ +({ \ + ia64_psr(regs)->ri = (val & 0xf); \ + regs->cr_iip = (val & ~0xfULL); \ +}) + +static inline unsigned long user_stack_pointer(struct pt_regs *regs) +{ + return regs->r12; +} + +static inline int is_syscall_success(struct pt_regs *regs) +{ + return regs->r10 != -1; +} + +static inline long regs_return_value(struct pt_regs *regs) +{ + if (is_syscall_success(regs)) + return regs->r8; + else + return -regs->r8; +} + +/* Conserve space in histogram by encoding slot bits in address + * bits 2 and 3 rather than bits 0 and 1. + */ +#define profile_pc(regs) \ +({ \ + unsigned long __ip = instruction_pointer(regs); \ + (__ip & ~3UL) + ((__ip & 3UL) << 2); \ +}) + + /* given a pointer to a task_struct, return the user's pt_regs */ +# define task_pt_regs(t) (((struct pt_regs *) ((char *) (t) + IA64_STK_OFFSET)) - 1) +# define ia64_psr(regs) ((struct ia64_psr *) &(regs)->cr_ipsr) +# define user_mode(regs) (((struct ia64_psr *) &(regs)->cr_ipsr)->cpl != 0) +# define user_stack(task,regs) ((long) regs - (long) task == IA64_STK_OFFSET - sizeof(*regs)) +# define fsys_mode(task,regs) \ + ({ \ + struct task_struct *_task = (task); \ + struct pt_regs *_regs = (regs); \ + !user_mode(_regs) && user_stack(_task, _regs); \ + }) + + /* + * System call handlers that, upon successful completion, need to return a negative value + * should call force_successful_syscall_return() right before returning. On architectures + * where the syscall convention provides for a separate error flag (e.g., alpha, ia64, + * ppc{,64}, sparc{,64}, possibly others), this macro can be used to ensure that the error + * flag will not get set. On architectures which do not support a separate error flag, + * the macro is a no-op and the spurious error condition needs to be filtered out by some + * other means (e.g., in user-level, by passing an extra argument to the syscall handler, + * or something along those lines). + * + * On ia64, we can clear the user's pt_regs->r8 to force a successful syscall. + */ +# define force_successful_syscall_return() (task_pt_regs(current)->r8 = 0) + + struct task_struct; /* forward decl */ + struct unw_frame_info; /* forward decl */ + + extern unsigned long ia64_get_user_rbs_end (struct task_struct *, struct pt_regs *, + unsigned long *); + extern long ia64_peek (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long *); + extern long ia64_poke (struct task_struct *, struct switch_stack *, unsigned long, + unsigned long, long); + extern void ia64_flush_fph (struct task_struct *); + extern void ia64_sync_fph (struct task_struct *); + extern void ia64_sync_krbs(void); + extern long ia64_sync_user_rbs (struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + + /* get nat bits for scratch registers such that bit N==1 iff scratch register rN is a NaT */ + extern unsigned long ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat); + /* put nat bits for scratch registers such that scratch register rN is a NaT iff bit N==1 */ + extern unsigned long ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat); + + extern void ia64_increment_ip (struct pt_regs *pt); + extern void ia64_decrement_ip (struct pt_regs *pt); + + extern void ia64_ptrace_stop(void); + #define arch_ptrace_stop() \ + ia64_ptrace_stop() + #define arch_ptrace_stop_needed() \ + (!test_thread_flag(TIF_RESTORE_RSE)) + + #define arch_has_single_step() (1) + #define arch_has_block_step() (1) + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_PTRACE_H */ diff --git a/arch/ia64/include/asm/sal.h b/arch/ia64/include/asm/sal.h new file mode 100644 index 000000000000..22749a201e92 --- /dev/null +++ b/arch/ia64/include/asm/sal.h @@ -0,0 +1,919 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_SAL_H +#define _ASM_IA64_SAL_H + +/* + * System Abstraction Layer definitions. + * + * This is based on version 2.5 of the manual "IA-64 System + * Abstraction Layer". + * + * Copyright (C) 2001 Intel + * Copyright (C) 2002 Jenna Hall + * Copyright (C) 2001 Fred Lewis + * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Srinivasa Prasad Thirumalachar + * + * 02/01/04 J. Hall Updated Error Record Structures to conform to July 2001 + * revision of the SAL spec. + * 01/01/03 fvlewis Updated Error Record Structures to conform with Nov. 2000 + * revision of the SAL spec. + * 99/09/29 davidm Updated for SAL 2.6. + * 00/03/29 cfleck Updated SAL Error Logging info for processor (SAL 2.6) + * (plus examples of platform error info structures from smariset @ Intel) + */ + +#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK_BIT 0 +#define IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT_BIT 1 +#define IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT_BIT 2 +#define IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT_BIT 3 + +#define IA64_SAL_PLATFORM_FEATURE_BUS_LOCK (1< +#include +#include + +#include +#include + +extern unsigned long sal_systab_phys; +extern spinlock_t sal_lock; + +/* SAL spec _requires_ eight args for each call. */ +#define __IA64_FW_CALL(entry,result,a0,a1,a2,a3,a4,a5,a6,a7) \ + result = (*entry)(a0,a1,a2,a3,a4,a5,a6,a7) + +# define IA64_FW_CALL(entry,result,args...) do { \ + unsigned long __ia64_sc_flags; \ + struct ia64_fpreg __ia64_sc_fr[6]; \ + ia64_save_scratch_fpregs(__ia64_sc_fr); \ + spin_lock_irqsave(&sal_lock, __ia64_sc_flags); \ + __IA64_FW_CALL(entry, result, args); \ + spin_unlock_irqrestore(&sal_lock, __ia64_sc_flags); \ + ia64_load_scratch_fpregs(__ia64_sc_fr); \ +} while (0) + +# define SAL_CALL(result,args...) \ + IA64_FW_CALL(ia64_sal, result, args); + +# define SAL_CALL_NOLOCK(result,args...) do { \ + unsigned long __ia64_scn_flags; \ + struct ia64_fpreg __ia64_scn_fr[6]; \ + ia64_save_scratch_fpregs(__ia64_scn_fr); \ + local_irq_save(__ia64_scn_flags); \ + __IA64_FW_CALL(ia64_sal, result, args); \ + local_irq_restore(__ia64_scn_flags); \ + ia64_load_scratch_fpregs(__ia64_scn_fr); \ +} while (0) + +# define SAL_CALL_REENTRANT(result,args...) do { \ + struct ia64_fpreg __ia64_scs_fr[6]; \ + ia64_save_scratch_fpregs(__ia64_scs_fr); \ + preempt_disable(); \ + __IA64_FW_CALL(ia64_sal, result, args); \ + preempt_enable(); \ + ia64_load_scratch_fpregs(__ia64_scs_fr); \ +} while (0) + +#define SAL_SET_VECTORS 0x01000000 +#define SAL_GET_STATE_INFO 0x01000001 +#define SAL_GET_STATE_INFO_SIZE 0x01000002 +#define SAL_CLEAR_STATE_INFO 0x01000003 +#define SAL_MC_RENDEZ 0x01000004 +#define SAL_MC_SET_PARAMS 0x01000005 +#define SAL_REGISTER_PHYSICAL_ADDR 0x01000006 + +#define SAL_CACHE_FLUSH 0x01000008 +#define SAL_CACHE_INIT 0x01000009 +#define SAL_PCI_CONFIG_READ 0x01000010 +#define SAL_PCI_CONFIG_WRITE 0x01000011 +#define SAL_FREQ_BASE 0x01000012 +#define SAL_PHYSICAL_ID_INFO 0x01000013 + +#define SAL_UPDATE_PAL 0x01000020 + +struct ia64_sal_retval { + /* + * A zero status value indicates call completed without error. + * A negative status value indicates reason of call failure. + * A positive status value indicates success but an + * informational value should be printed (e.g., "reboot for + * change to take effect"). + */ + long status; + unsigned long v0; + unsigned long v1; + unsigned long v2; +}; + +typedef struct ia64_sal_retval (*ia64_sal_handler) (u64, ...); + +enum { + SAL_FREQ_BASE_PLATFORM = 0, + SAL_FREQ_BASE_INTERVAL_TIMER = 1, + SAL_FREQ_BASE_REALTIME_CLOCK = 2 +}; + +/* + * The SAL system table is followed by a variable number of variable + * length descriptors. The structure of these descriptors follows + * below. + * The defininition follows SAL specs from July 2000 + */ +struct ia64_sal_systab { + u8 signature[4]; /* should be "SST_" */ + u32 size; /* size of this table in bytes */ + u8 sal_rev_minor; + u8 sal_rev_major; + u16 entry_count; /* # of entries in variable portion */ + u8 checksum; + u8 reserved1[7]; + u8 sal_a_rev_minor; + u8 sal_a_rev_major; + u8 sal_b_rev_minor; + u8 sal_b_rev_major; + /* oem_id & product_id: terminating NUL is missing if string is exactly 32 bytes long. */ + u8 oem_id[32]; + u8 product_id[32]; /* ASCII product id */ + u8 reserved2[8]; +}; + +enum sal_systab_entry_type { + SAL_DESC_ENTRY_POINT = 0, + SAL_DESC_MEMORY = 1, + SAL_DESC_PLATFORM_FEATURE = 2, + SAL_DESC_TR = 3, + SAL_DESC_PTC = 4, + SAL_DESC_AP_WAKEUP = 5 +}; + +/* + * Entry type: Size: + * 0 48 + * 1 32 + * 2 16 + * 3 32 + * 4 16 + * 5 16 + */ +#define SAL_DESC_SIZE(type) "\060\040\020\040\020\020"[(unsigned) type] + +typedef struct ia64_sal_desc_entry_point { + u8 type; + u8 reserved1[7]; + u64 pal_proc; + u64 sal_proc; + u64 gp; + u8 reserved2[16]; +}ia64_sal_desc_entry_point_t; + +typedef struct ia64_sal_desc_memory { + u8 type; + u8 used_by_sal; /* needs to be mapped for SAL? */ + u8 mem_attr; /* current memory attribute setting */ + u8 access_rights; /* access rights set up by SAL */ + u8 mem_attr_mask; /* mask of supported memory attributes */ + u8 reserved1; + u8 mem_type; /* memory type */ + u8 mem_usage; /* memory usage */ + u64 addr; /* physical address of memory */ + u32 length; /* length (multiple of 4KB pages) */ + u32 reserved2; + u8 oem_reserved[8]; +} ia64_sal_desc_memory_t; + +typedef struct ia64_sal_desc_platform_feature { + u8 type; + u8 feature_mask; + u8 reserved1[14]; +} ia64_sal_desc_platform_feature_t; + +typedef struct ia64_sal_desc_tr { + u8 type; + u8 tr_type; /* 0 == instruction, 1 == data */ + u8 regnum; /* translation register number */ + u8 reserved1[5]; + u64 addr; /* virtual address of area covered */ + u64 page_size; /* encoded page size */ + u8 reserved2[8]; +} ia64_sal_desc_tr_t; + +typedef struct ia64_sal_desc_ptc { + u8 type; + u8 reserved1[3]; + u32 num_domains; /* # of coherence domains */ + u64 domain_info; /* physical address of domain info table */ +} ia64_sal_desc_ptc_t; + +typedef struct ia64_sal_ptc_domain_info { + u64 proc_count; /* number of processors in domain */ + u64 proc_list; /* physical address of LID array */ +} ia64_sal_ptc_domain_info_t; + +typedef struct ia64_sal_ptc_domain_proc_entry { + u64 id : 8; /* id of processor */ + u64 eid : 8; /* eid of processor */ +} ia64_sal_ptc_domain_proc_entry_t; + + +#define IA64_SAL_AP_EXTERNAL_INT 0 + +typedef struct ia64_sal_desc_ap_wakeup { + u8 type; + u8 mechanism; /* 0 == external interrupt */ + u8 reserved1[6]; + u64 vector; /* interrupt vector in range 0x10-0xff */ +} ia64_sal_desc_ap_wakeup_t ; + +extern ia64_sal_handler ia64_sal; +extern struct ia64_sal_desc_ptc *ia64_ptc_domain_info; + +extern unsigned short sal_revision; /* supported SAL spec revision */ +extern unsigned short sal_version; /* SAL version; OEM dependent */ +#define SAL_VERSION_CODE(major, minor) ((bin2bcd(major) << 8) | bin2bcd(minor)) + +extern const char *ia64_sal_strerror (long status); +extern void ia64_sal_init (struct ia64_sal_systab *sal_systab); + +/* SAL information type encodings */ +enum { + SAL_INFO_TYPE_MCA = 0, /* Machine check abort information */ + SAL_INFO_TYPE_INIT = 1, /* Init information */ + SAL_INFO_TYPE_CMC = 2, /* Corrected machine check information */ + SAL_INFO_TYPE_CPE = 3 /* Corrected platform error information */ +}; + +/* Encodings for machine check parameter types */ +enum { + SAL_MC_PARAM_RENDEZ_INT = 1, /* Rendezvous interrupt */ + SAL_MC_PARAM_RENDEZ_WAKEUP = 2, /* Wakeup */ + SAL_MC_PARAM_CPE_INT = 3 /* Corrected Platform Error Int */ +}; + +/* Encodings for rendezvous mechanisms */ +enum { + SAL_MC_PARAM_MECHANISM_INT = 1, /* Use interrupt */ + SAL_MC_PARAM_MECHANISM_MEM = 2 /* Use memory synchronization variable*/ +}; + +/* Encodings for vectors which can be registered by the OS with SAL */ +enum { + SAL_VECTOR_OS_MCA = 0, + SAL_VECTOR_OS_INIT = 1, + SAL_VECTOR_OS_BOOT_RENDEZ = 2 +}; + +/* Encodings for mca_opt parameter sent to SAL_MC_SET_PARAMS */ +#define SAL_MC_PARAM_RZ_ALWAYS 0x1 +#define SAL_MC_PARAM_BINIT_ESCALATE 0x10 + +/* + * Definition of the SAL Error Log from the SAL spec + */ + +/* SAL Error Record Section GUID Definitions */ +#define SAL_PROC_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf1, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_MEM_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf2, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_SEL_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf3, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_PCI_BUS_ERR_SECT_GUID \ + EFI_GUID(0xe429faf4, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID \ + EFI_GUID(0xe429faf5, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_PCI_COMP_ERR_SECT_GUID \ + EFI_GUID(0xe429faf6, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_SPECIFIC_ERR_SECT_GUID \ + EFI_GUID(0xe429faf7, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_HOST_CTLR_ERR_SECT_GUID \ + EFI_GUID(0xe429faf8, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define SAL_PLAT_BUS_ERR_SECT_GUID \ + EFI_GUID(0xe429faf9, 0x3cb7, 0x11d4, 0xbc, 0xa7, 0x0, 0x80, 0xc7, 0x3c, 0x88, 0x81) +#define PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID \ + EFI_GUID(0x6cb0a200, 0x893a, 0x11da, 0x96, 0xd2, 0x0, 0x10, 0x83, 0xff, \ + 0xca, 0x4d) + +#define MAX_CACHE_ERRORS 6 +#define MAX_TLB_ERRORS 6 +#define MAX_BUS_ERRORS 1 + +/* Definition of version according to SAL spec for logging purposes */ +typedef struct sal_log_revision { + u8 minor; /* BCD (0..99) */ + u8 major; /* BCD (0..99) */ +} sal_log_revision_t; + +/* Definition of timestamp according to SAL spec for logging purposes */ +typedef struct sal_log_timestamp { + u8 slh_second; /* Second (0..59) */ + u8 slh_minute; /* Minute (0..59) */ + u8 slh_hour; /* Hour (0..23) */ + u8 slh_reserved; + u8 slh_day; /* Day (1..31) */ + u8 slh_month; /* Month (1..12) */ + u8 slh_year; /* Year (00..99) */ + u8 slh_century; /* Century (19, 20, 21, ...) */ +} sal_log_timestamp_t; + +/* Definition of log record header structures */ +typedef struct sal_log_record_header { + u64 id; /* Unique monotonically increasing ID */ + sal_log_revision_t revision; /* Major and Minor revision of header */ + u8 severity; /* Error Severity */ + u8 validation_bits; /* 0: platform_guid, 1: !timestamp */ + u32 len; /* Length of this error log in bytes */ + sal_log_timestamp_t timestamp; /* Timestamp */ + efi_guid_t platform_guid; /* Unique OEM Platform ID */ +} sal_log_record_header_t; + +#define sal_log_severity_recoverable 0 +#define sal_log_severity_fatal 1 +#define sal_log_severity_corrected 2 + +/* + * Error Recovery Info (ERI) bit decode. From SAL Spec section B.2.2 Table B-3 + * Error Section Error_Recovery_Info Field Definition. + */ +#define ERI_NOT_VALID 0x0 /* Error Recovery Field is not valid */ +#define ERI_NOT_ACCESSIBLE 0x30 /* Resource not accessible */ +#define ERI_CONTAINMENT_WARN 0x22 /* Corrupt data propagated */ +#define ERI_UNCORRECTED_ERROR 0x20 /* Uncorrected error */ +#define ERI_COMPONENT_RESET 0x24 /* Component must be reset */ +#define ERI_CORR_ERROR_LOG 0x21 /* Corrected error, needs logging */ +#define ERI_CORR_ERROR_THRESH 0x29 /* Corrected error threshold exceeded */ + +/* Definition of log section header structures */ +typedef struct sal_log_sec_header { + efi_guid_t guid; /* Unique Section ID */ + sal_log_revision_t revision; /* Major and Minor revision of Section */ + u8 error_recovery_info; /* Platform error recovery status */ + u8 reserved; + u32 len; /* Section length */ +} sal_log_section_hdr_t; + +typedef struct sal_log_mod_error_info { + struct { + u64 check_info : 1, + requestor_identifier : 1, + responder_identifier : 1, + target_identifier : 1, + precise_ip : 1, + reserved : 59; + } valid; + u64 check_info; + u64 requestor_identifier; + u64 responder_identifier; + u64 target_identifier; + u64 precise_ip; +} sal_log_mod_error_info_t; + +typedef struct sal_processor_static_info { + struct { + u64 minstate : 1, + br : 1, + cr : 1, + ar : 1, + rr : 1, + fr : 1, + reserved : 58; + } valid; + struct pal_min_state_area min_state_area; + u64 br[8]; + u64 cr[128]; + u64 ar[128]; + u64 rr[8]; + struct ia64_fpreg __attribute__ ((packed)) fr[128]; +} sal_processor_static_info_t; + +struct sal_cpuid_info { + u64 regs[5]; + u64 reserved; +}; + +typedef struct sal_log_processor_info { + sal_log_section_hdr_t header; + struct { + u64 proc_error_map : 1, + proc_state_param : 1, + proc_cr_lid : 1, + psi_static_struct : 1, + num_cache_check : 4, + num_tlb_check : 4, + num_bus_check : 4, + num_reg_file_check : 4, + num_ms_check : 4, + cpuid_info : 1, + reserved1 : 39; + } valid; + u64 proc_error_map; + u64 proc_state_parameter; + u64 proc_cr_lid; + /* + * The rest of this structure consists of variable-length arrays, which can't be + * expressed in C. + */ + sal_log_mod_error_info_t info[]; + /* + * This is what the rest looked like if C supported variable-length arrays: + * + * sal_log_mod_error_info_t cache_check_info[.valid.num_cache_check]; + * sal_log_mod_error_info_t tlb_check_info[.valid.num_tlb_check]; + * sal_log_mod_error_info_t bus_check_info[.valid.num_bus_check]; + * sal_log_mod_error_info_t reg_file_check_info[.valid.num_reg_file_check]; + * sal_log_mod_error_info_t ms_check_info[.valid.num_ms_check]; + * struct sal_cpuid_info cpuid_info; + * sal_processor_static_info_t processor_static_info; + */ +} sal_log_processor_info_t; + +/* Given a sal_log_processor_info_t pointer, return a pointer to the processor_static_info: */ +#define SAL_LPI_PSI_INFO(l) \ +({ sal_log_processor_info_t *_l = (l); \ + ((sal_processor_static_info_t *) \ + ((char *) _l->info + ((_l->valid.num_cache_check + _l->valid.num_tlb_check \ + + _l->valid.num_bus_check + _l->valid.num_reg_file_check \ + + _l->valid.num_ms_check) * sizeof(sal_log_mod_error_info_t) \ + + sizeof(struct sal_cpuid_info)))); \ +}) + +/* platform error log structures */ + +typedef struct sal_log_mem_dev_err_info { + sal_log_section_hdr_t header; + struct { + u64 error_status : 1, + physical_addr : 1, + addr_mask : 1, + node : 1, + card : 1, + module : 1, + bank : 1, + device : 1, + row : 1, + column : 1, + bit_position : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + bus_spec_data : 1, + oem_id : 1, + oem_data : 1, + reserved : 47; + } valid; + u64 error_status; + u64 physical_addr; + u64 addr_mask; + u16 node; + u16 card; + u16 module; + u16 bank; + u16 device; + u16 row; + u16 column; + u16 bit_position; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u64 bus_spec_data; + u8 oem_id[16]; + u8 oem_data[1]; /* Variable length data */ +} sal_log_mem_dev_err_info_t; + +typedef struct sal_log_sel_dev_err_info { + sal_log_section_hdr_t header; + struct { + u64 record_id : 1, + record_type : 1, + generator_id : 1, + evm_rev : 1, + sensor_type : 1, + sensor_num : 1, + event_dir : 1, + event_data1 : 1, + event_data2 : 1, + event_data3 : 1, + reserved : 54; + } valid; + u16 record_id; + u8 record_type; + u8 timestamp[4]; + u16 generator_id; + u8 evm_rev; + u8 sensor_type; + u8 sensor_num; + u8 event_dir; + u8 event_data1; + u8 event_data2; + u8 event_data3; +} sal_log_sel_dev_err_info_t; + +typedef struct sal_log_pci_bus_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + err_type : 1, + bus_id : 1, + bus_address : 1, + bus_data : 1, + bus_cmd : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + oem_data : 1, + reserved : 54; + } valid; + u64 err_status; + u16 err_type; + u16 bus_id; + u32 reserved; + u64 bus_address; + u64 bus_data; + u64 bus_cmd; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u8 oem_data[1]; /* Variable length data */ +} sal_log_pci_bus_err_info_t; + +typedef struct sal_log_smbios_dev_err_info { + sal_log_section_hdr_t header; + struct { + u64 event_type : 1, + length : 1, + time_stamp : 1, + data : 1, + reserved1 : 60; + } valid; + u8 event_type; + u8 length; + u8 time_stamp[6]; + u8 data[1]; /* data of variable length, length == slsmb_length */ +} sal_log_smbios_dev_err_info_t; + +typedef struct sal_log_pci_comp_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + comp_info : 1, + num_mem_regs : 1, + num_io_regs : 1, + reg_data_pairs : 1, + oem_data : 1, + reserved : 58; + } valid; + u64 err_status; + struct { + u16 vendor_id; + u16 device_id; + u8 class_code[3]; + u8 func_num; + u8 dev_num; + u8 bus_num; + u8 seg_num; + u8 reserved[5]; + } comp_info; + u32 num_mem_regs; + u32 num_io_regs; + u64 reg_data_pairs[1]; + /* + * array of address/data register pairs is num_mem_regs + num_io_regs elements + * long. Each array element consists of a u64 address followed by a u64 data + * value. The oem_data array immediately follows the reg_data_pairs array + */ + u8 oem_data[1]; /* Variable length data */ +} sal_log_pci_comp_err_info_t; + +typedef struct sal_log_plat_specific_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + guid : 1, + oem_data : 1, + reserved : 61; + } valid; + u64 err_status; + efi_guid_t guid; + u8 oem_data[1]; /* platform specific variable length data */ +} sal_log_plat_specific_err_info_t; + +typedef struct sal_log_host_ctlr_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + bus_spec_data : 1, + oem_data : 1, + reserved : 58; + } valid; + u64 err_status; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u64 bus_spec_data; + u8 oem_data[1]; /* Variable length OEM data */ +} sal_log_host_ctlr_err_info_t; + +typedef struct sal_log_plat_bus_err_info { + sal_log_section_hdr_t header; + struct { + u64 err_status : 1, + requestor_id : 1, + responder_id : 1, + target_id : 1, + bus_spec_data : 1, + oem_data : 1, + reserved : 58; + } valid; + u64 err_status; + u64 requestor_id; + u64 responder_id; + u64 target_id; + u64 bus_spec_data; + u8 oem_data[1]; /* Variable length OEM data */ +} sal_log_plat_bus_err_info_t; + +/* Overall platform error section structure */ +typedef union sal_log_platform_err_info { + sal_log_mem_dev_err_info_t mem_dev_err; + sal_log_sel_dev_err_info_t sel_dev_err; + sal_log_pci_bus_err_info_t pci_bus_err; + sal_log_smbios_dev_err_info_t smbios_dev_err; + sal_log_pci_comp_err_info_t pci_comp_err; + sal_log_plat_specific_err_info_t plat_specific_err; + sal_log_host_ctlr_err_info_t host_ctlr_err; + sal_log_plat_bus_err_info_t plat_bus_err; +} sal_log_platform_err_info_t; + +/* SAL log over-all, multi-section error record structure (processor+platform) */ +typedef struct err_rec { + sal_log_record_header_t sal_elog_header; + sal_log_processor_info_t proc_err; + sal_log_platform_err_info_t plat_err; + u8 oem_data_pad[1024]; +} ia64_err_rec_t; + +/* + * Now define a couple of inline functions for improved type checking + * and convenience. + */ + +extern s64 ia64_sal_cache_flush (u64 cache_type); +extern void __init check_sal_cache_flush (void); + +/* Initialize all the processor and platform level instruction and data caches */ +static inline s64 +ia64_sal_cache_init (void) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_CACHE_INIT, 0, 0, 0, 0, 0, 0, 0); + return isrv.status; +} + +/* + * Clear the processor and platform information logged by SAL with respect to the machine + * state at the time of MCA's, INITs, CMCs, or CPEs. + */ +static inline s64 +ia64_sal_clear_state_info (u64 sal_info_type) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SAL_CLEAR_STATE_INFO, sal_info_type, 0, + 0, 0, 0, 0, 0); + return isrv.status; +} + + +/* Get the processor and platform information logged by SAL with respect to the machine + * state at the time of the MCAs, INITs, CMCs, or CPEs. + */ +static inline u64 +ia64_sal_get_state_info (u64 sal_info_type, u64 *sal_info) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO, sal_info_type, 0, + sal_info, 0, 0, 0, 0); + if (isrv.status) + return 0; + + return isrv.v0; +} + +/* + * Get the maximum size of the information logged by SAL with respect to the machine state + * at the time of MCAs, INITs, CMCs, or CPEs. + */ +static inline u64 +ia64_sal_get_state_info_size (u64 sal_info_type) +{ + struct ia64_sal_retval isrv; + SAL_CALL_REENTRANT(isrv, SAL_GET_STATE_INFO_SIZE, sal_info_type, 0, + 0, 0, 0, 0, 0); + if (isrv.status) + return 0; + return isrv.v0; +} + +/* + * Causes the processor to go into a spin loop within SAL where SAL awaits a wakeup from + * the monarch processor. Must not lock, because it will not return on any cpu until the + * monarch processor sends a wake up. + */ +static inline s64 +ia64_sal_mc_rendez (void) +{ + struct ia64_sal_retval isrv; + SAL_CALL_NOLOCK(isrv, SAL_MC_RENDEZ, 0, 0, 0, 0, 0, 0, 0); + return isrv.status; +} + +/* + * Allow the OS to specify the interrupt number to be used by SAL to interrupt OS during + * the machine check rendezvous sequence as well as the mechanism to wake up the + * non-monarch processor at the end of machine check processing. + * Returns the complete ia64_sal_retval because some calls return more than just a status + * value. + */ +static inline struct ia64_sal_retval +ia64_sal_mc_set_params (u64 param_type, u64 i_or_m, u64 i_or_m_val, u64 timeout, u64 rz_always) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_MC_SET_PARAMS, param_type, i_or_m, i_or_m_val, + timeout, rz_always, 0, 0); + return isrv; +} + +/* Read from PCI configuration space */ +static inline s64 +ia64_sal_pci_config_read (u64 pci_config_addr, int type, u64 size, u64 *value) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_PCI_CONFIG_READ, pci_config_addr, size, type, 0, 0, 0, 0); + if (value) + *value = isrv.v0; + return isrv.status; +} + +/* Write to PCI configuration space */ +static inline s64 +ia64_sal_pci_config_write (u64 pci_config_addr, int type, u64 size, u64 value) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_PCI_CONFIG_WRITE, pci_config_addr, size, value, + type, 0, 0, 0); + return isrv.status; +} + +/* + * Register physical addresses of locations needed by SAL when SAL procedures are invoked + * in virtual mode. + */ +static inline s64 +ia64_sal_register_physical_addr (u64 phys_entry, u64 phys_addr) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_REGISTER_PHYSICAL_ADDR, phys_entry, phys_addr, + 0, 0, 0, 0, 0); + return isrv.status; +} + +/* + * Register software dependent code locations within SAL. These locations are handlers or + * entry points where SAL will pass control for the specified event. These event handlers + * are for the bott rendezvous, MCAs and INIT scenarios. + */ +static inline s64 +ia64_sal_set_vectors (u64 vector_type, + u64 handler_addr1, u64 gp1, u64 handler_len1, + u64 handler_addr2, u64 gp2, u64 handler_len2) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_SET_VECTORS, vector_type, + handler_addr1, gp1, handler_len1, + handler_addr2, gp2, handler_len2); + + return isrv.status; +} + +/* Update the contents of PAL block in the non-volatile storage device */ +static inline s64 +ia64_sal_update_pal (u64 param_buf, u64 scratch_buf, u64 scratch_buf_size, + u64 *error_code, u64 *scratch_buf_size_needed) +{ + struct ia64_sal_retval isrv; + SAL_CALL(isrv, SAL_UPDATE_PAL, param_buf, scratch_buf, scratch_buf_size, + 0, 0, 0, 0); + if (error_code) + *error_code = isrv.v0; + if (scratch_buf_size_needed) + *scratch_buf_size_needed = isrv.v1; + return isrv.status; +} + +/* Get physical processor die mapping in the platform. */ +static inline s64 +ia64_sal_physical_id_info(u16 *splid) +{ + struct ia64_sal_retval isrv; + + if (sal_revision < SAL_VERSION_CODE(3,2)) + return -1; + + SAL_CALL(isrv, SAL_PHYSICAL_ID_INFO, 0, 0, 0, 0, 0, 0, 0); + if (splid) + *splid = isrv.v0; + return isrv.status; +} + +extern unsigned long sal_platform_features; + +extern int (*salinfo_platform_oemdata)(const u8 *, u8 **, u64 *); + +struct sal_ret_values { + long r8; long r9; long r10; long r11; +}; + +#define IA64_SAL_OEMFUNC_MIN 0x02000000 +#define IA64_SAL_OEMFUNC_MAX 0x03ffffff + +extern int ia64_sal_oemcall(struct ia64_sal_retval *, u64, u64, u64, u64, u64, + u64, u64, u64); +extern int ia64_sal_oemcall_nolock(struct ia64_sal_retval *, u64, u64, u64, + u64, u64, u64, u64, u64); +extern int ia64_sal_oemcall_reentrant(struct ia64_sal_retval *, u64, u64, u64, + u64, u64, u64, u64, u64); +extern long +ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info); +#ifdef CONFIG_HOTPLUG_CPU +/* + * System Abstraction Layer Specification + * Section 3.2.5.1: OS_BOOT_RENDEZ to SAL return State. + * Note: region regs are stored first in head.S _start. Hence they must + * stay up front. + */ +struct sal_to_os_boot { + u64 rr[8]; /* Region Registers */ + u64 br[6]; /* br0: + * return addr into SAL boot rendez routine */ + u64 gr1; /* SAL:GP */ + u64 gr12; /* SAL:SP */ + u64 gr13; /* SAL: Task Pointer */ + u64 fpsr; + u64 pfs; + u64 rnat; + u64 unat; + u64 bspstore; + u64 dcr; /* Default Control Register */ + u64 iva; + u64 pta; + u64 itv; + u64 pmv; + u64 cmcv; + u64 lrr[2]; + u64 gr[4]; + u64 pr; /* Predicate registers */ + u64 lc; /* Loop Count */ + struct ia64_fpreg fp[20]; +}; + +/* + * Global array allocated for NR_CPUS at boot time + */ +extern struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; + +extern void ia64_jump_to_sal(struct sal_to_os_boot *); +#endif + +extern void ia64_sal_handler_init(void *entry_point, void *gpval); + +#define PALO_MAX_TLB_PURGES 0xFFFF +#define PALO_SIG "PALO" + +struct palo_table { + u8 signature[4]; /* Should be "PALO" */ + u32 length; + u8 minor_revision; + u8 major_revision; + u8 checksum; + u8 reserved1[5]; + u16 max_tlb_purges; + u8 reserved2[6]; +}; + +#define NPTCG_FROM_PAL 0 +#define NPTCG_FROM_PALO 1 +#define NPTCG_FROM_KERNEL_PARAMETER 2 + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_SAL_H */ diff --git a/arch/ia64/include/asm/sections.h b/arch/ia64/include/asm/sections.h new file mode 100644 index 000000000000..8e0875cf6071 --- /dev/null +++ b/arch/ia64/include/asm/sections.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_SECTIONS_H +#define _ASM_IA64_SECTIONS_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + +typedef struct fdesc func_desc_t; + +#include + +extern char __phys_per_cpu_start[]; +#ifdef CONFIG_SMP +extern char __cpu0_per_cpu[]; +#endif +extern char __start___vtop_patchlist[], __end___vtop_patchlist[]; +extern char __start___rse_patchlist[], __end___rse_patchlist[]; +extern char __start___mckinley_e9_bundles[], __end___mckinley_e9_bundles[]; +extern char __start___phys_stack_reg_patchlist[], __end___phys_stack_reg_patchlist[]; +extern char __start_gate_section[]; +extern char __start_gate_mckinley_e9_patchlist[], __end_gate_mckinley_e9_patchlist[]; +extern char __start_gate_vtop_patchlist[], __end_gate_vtop_patchlist[]; +extern char __start_gate_fsyscall_patchlist[], __end_gate_fsyscall_patchlist[]; +extern char __start_gate_brl_fsys_bubble_down_patchlist[], __end_gate_brl_fsys_bubble_down_patchlist[]; +extern char __start_unwind[], __end_unwind[]; +extern char __start_ivt_text[], __end_ivt_text[]; + +#endif /* _ASM_IA64_SECTIONS_H */ diff --git a/arch/ia64/include/asm/serial.h b/arch/ia64/include/asm/serial.h new file mode 100644 index 000000000000..068be11583df --- /dev/null +++ b/arch/ia64/include/asm/serial.h @@ -0,0 +1,17 @@ +/* + * Derived from the i386 version. + */ + +/* + * This assumes you have a 1.8432 MHz clock for your UART. + * + * It'd be nice if someone built a serial card with a 24.576 MHz + * clock, since the 16550A is capable of handling a top speed of 1.5 + * megabits/second; but this requires the faster clock. + */ +#define BASE_BAUD ( 1843200 / 16 ) + +/* + * All legacy serial ports should be enumerated via ACPI namespace, so + * we need not list them here. + */ diff --git a/arch/ia64/include/asm/shmparam.h b/arch/ia64/include/asm/shmparam.h new file mode 100644 index 000000000000..43bd8324ab71 --- /dev/null +++ b/arch/ia64/include/asm/shmparam.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_SHMPARAM_H +#define _ASM_IA64_SHMPARAM_H + +/* + * SHMLBA controls minimum alignment at which shared memory segments + * get attached. The IA-64 architecture says that there may be a + * performance degradation when there are virtual aliases within 1MB. + * To reduce the chance of this, we set SHMLBA to 1MB. --davidm 00/12/20 + */ +#define SHMLBA (1024*1024) + +#endif /* _ASM_IA64_SHMPARAM_H */ diff --git a/arch/ia64/include/asm/signal.h b/arch/ia64/include/asm/signal.h new file mode 100644 index 000000000000..80f067f9b3ce --- /dev/null +++ b/arch/ia64/include/asm/signal.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Modified 1998-2001, 2003 + * David Mosberger-Tang , Hewlett-Packard Co + * + * Unfortunately, this file is being included by bits/signal.h in + * glibc-2.x. Hence the #ifdef __KERNEL__ ugliness. + */ +#ifndef _ASM_IA64_SIGNAL_H +#define _ASM_IA64_SIGNAL_H + +#include + + +#define _NSIG 64 +#define _NSIG_BPW 64 +#define _NSIG_WORDS (_NSIG / _NSIG_BPW) + +# ifndef __ASSEMBLY__ + +/* Most things should be clean enough to redefine this at will, if care + is taken to make libc match. */ + +typedef unsigned long old_sigset_t; + +typedef struct { + unsigned long sig[_NSIG_WORDS]; +} sigset_t; + +# include + +# endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_SIGNAL_H */ diff --git a/arch/ia64/include/asm/smp.h b/arch/ia64/include/asm/smp.h new file mode 100644 index 000000000000..aa92234c0142 --- /dev/null +++ b/arch/ia64/include/asm/smp.h @@ -0,0 +1,103 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * SMP Support + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * (c) Copyright 2001-2003, 2005 Hewlett-Packard Development Company, L.P. + * David Mosberger-Tang + * Bjorn Helgaas + */ +#ifndef _ASM_IA64_SMP_H +#define _ASM_IA64_SMP_H + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static inline unsigned int +ia64_get_lid (void) +{ + union { + struct { + unsigned long reserved : 16; + unsigned long eid : 8; + unsigned long id : 8; + unsigned long ignored : 32; + } f; + unsigned long bits; + } lid; + + lid.bits = ia64_getreg(_IA64_REG_CR_LID); + return lid.f.id << 8 | lid.f.eid; +} + +#define hard_smp_processor_id() ia64_get_lid() + +#ifdef CONFIG_SMP + +#define raw_smp_processor_id() (current_thread_info()->cpu) + +extern struct smp_boot_data { + int cpu_count; + int cpu_phys_id[NR_CPUS]; +} smp_boot_data __initdata; + +extern char no_int_routing; + +extern cpumask_t cpu_core_map[NR_CPUS]; +DECLARE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); +extern int smp_num_siblings; +extern void __iomem *ipi_base_addr; + +extern volatile int ia64_cpu_to_sapicid[]; +#define cpu_physical_id(i) ia64_cpu_to_sapicid[i] + +extern unsigned long ap_wakeup_vector; + +/* + * Function to map hard smp processor id to logical id. Slow, so don't use this in + * performance-critical code. + */ +static inline int +cpu_logical_id (int cpuid) +{ + int i; + + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(i) == cpuid) + break; + return i; +} + +/* Upping and downing of CPUs */ +extern int __cpu_disable (void); +extern void __cpu_die (unsigned int cpu); +extern void cpu_die (void) __attribute__ ((noreturn)); +extern void __init smp_build_cpu_map(void); + +extern void __init init_smp_config (void); +extern void smp_do_timer (struct pt_regs *regs); + +extern irqreturn_t handle_IPI(int irq, void *dev_id); +extern void smp_send_reschedule (int cpu); +extern void identify_siblings (struct cpuinfo_ia64 *); +extern int is_multithreading_enabled(void); + +extern void arch_send_call_function_single_ipi(int cpu); +extern void arch_send_call_function_ipi_mask(const struct cpumask *mask); + +#else /* CONFIG_SMP */ + +#define cpu_logical_id(i) 0 +#define cpu_physical_id(i) ia64_get_lid() + +#endif /* CONFIG_SMP */ +#endif /* _ASM_IA64_SMP_H */ diff --git a/arch/ia64/include/asm/sn/intr.h b/arch/ia64/include/asm/sn/intr.h new file mode 100644 index 000000000000..3885a77b21df --- /dev/null +++ b/arch/ia64/include/asm/sn/intr.h @@ -0,0 +1,15 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1992 - 1997, 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_SN_INTR_H +#define _ASM_IA64_SN_INTR_H + +#define SGI_XPC_ACTIVATE 0x30 +#define SGI_XPC_NOTIFY 0xe7 + +#endif /* _ASM_IA64_SN_INTR_H */ diff --git a/arch/ia64/include/asm/sn/sn_sal.h b/arch/ia64/include/asm/sn/sn_sal.h new file mode 100644 index 000000000000..d437aa43343b --- /dev/null +++ b/arch/ia64/include/asm/sn/sn_sal.h @@ -0,0 +1,124 @@ +#ifndef _ASM_IA64_SN_SN_SAL_H +#define _ASM_IA64_SN_SN_SAL_H + +/* + * System Abstraction Layer definitions for IA64 + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (c) 2000-2006 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include + +// SGI Specific Calls +#define SN_SAL_GET_PARTITION_ADDR 0x02000009 +#define SN_SAL_MEMPROTECT 0x0200003e + +#define SN_SAL_WATCHLIST_ALLOC 0x02000070 +#define SN_SAL_WATCHLIST_FREE 0x02000071 + +/* + * SAL Error Codes + */ +#define SALRET_MORE_PASSES 1 +#define SALRET_OK 0 +#define SALRET_NOT_IMPLEMENTED (-1) +#define SALRET_INVALID_ARG (-2) +#define SALRET_ERROR (-3) + +/* + * Returns the physical address of the partition's reserved page through + * an iterative number of calls. + * + * On first call, 'cookie' and 'len' should be set to 0, and 'addr' + * set to the nasid of the partition whose reserved page's address is + * being sought. + * On subsequent calls, pass the values, that were passed back on the + * previous call. + * + * While the return status equals SALRET_MORE_PASSES, keep calling + * this function after first copying 'len' bytes starting at 'addr' + * into 'buf'. Once the return status equals SALRET_OK, 'addr' will + * be the physical address of the partition's reserved page. If the + * return status equals neither of these, an error as occurred. + */ +static inline s64 +sn_partition_reserved_page_pa(u64 buf, u64 *cookie, u64 *addr, u64 *len) +{ + struct ia64_sal_retval rv; + ia64_sal_oemcall_reentrant(&rv, SN_SAL_GET_PARTITION_ADDR, *cookie, + *addr, buf, *len, 0, 0, 0); + *cookie = rv.v0; + *addr = rv.v1; + *len = rv.v2; + return rv.status; +} + +/* + * Change memory access protections for a physical address range. + * nasid_array is not used on Altix, but may be in future architectures. + * Available memory protection access classes are defined after the function. + */ +static inline int +sn_change_memprotect(u64 paddr, u64 len, u64 perms, u64 *nasid_array) +{ + struct ia64_sal_retval ret_stuff; + + ia64_sal_oemcall_nolock(&ret_stuff, SN_SAL_MEMPROTECT, paddr, len, + (u64)nasid_array, perms, 0, 0, 0); + return ret_stuff.status; +} +#define SN_MEMPROT_ACCESS_CLASS_0 0x14a080 +#define SN_MEMPROT_ACCESS_CLASS_1 0x2520c2 +#define SN_MEMPROT_ACCESS_CLASS_2 0x14a1ca +#define SN_MEMPROT_ACCESS_CLASS_3 0x14a290 +#define SN_MEMPROT_ACCESS_CLASS_6 0x084080 +#define SN_MEMPROT_ACCESS_CLASS_7 0x021080 + +union sn_watchlist_u { + u64 val; + struct { + u64 blade : 16, + size : 32, + filler : 16; + }; +}; + +static inline int +sn_mq_watchlist_alloc(int blade, void *mq, unsigned int mq_size, + unsigned long *intr_mmr_offset) +{ + struct ia64_sal_retval rv; + unsigned long addr; + union sn_watchlist_u size_blade; + int watchlist; + + addr = (unsigned long)mq; + size_blade.size = mq_size; + size_blade.blade = blade; + + /* + * bios returns watchlist number or negative error number. + */ + ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_ALLOC, addr, + size_blade.val, (u64)intr_mmr_offset, + (u64)&watchlist, 0, 0, 0); + if (rv.status < 0) + return rv.status; + + return watchlist; +} + +static inline int +sn_mq_watchlist_free(int blade, int watchlist_num) +{ + struct ia64_sal_retval rv; + ia64_sal_oemcall_nolock(&rv, SN_SAL_WATCHLIST_FREE, blade, + watchlist_num, 0, 0, 0, 0, 0); + return rv.status; +} +#endif /* _ASM_IA64_SN_SN_SAL_H */ diff --git a/arch/ia64/include/asm/sparsemem.h b/arch/ia64/include/asm/sparsemem.h new file mode 100644 index 000000000000..a58f8b466d96 --- /dev/null +++ b/arch/ia64/include/asm/sparsemem.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_SPARSEMEM_H +#define _ASM_IA64_SPARSEMEM_H + +#ifdef CONFIG_SPARSEMEM +#include +/* + * SECTION_SIZE_BITS 2^N: how big each section will be + * MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space + */ + +#define SECTION_SIZE_BITS (30) +#define MAX_PHYSMEM_BITS (50) +#ifdef CONFIG_ARCH_FORCE_MAX_ORDER +#if (CONFIG_ARCH_FORCE_MAX_ORDER + PAGE_SHIFT > SECTION_SIZE_BITS) +#undef SECTION_SIZE_BITS +#define SECTION_SIZE_BITS (CONFIG_ARCH_FORCE_MAX_ORDER + PAGE_SHIFT) +#endif +#endif + +#endif /* CONFIG_SPARSEMEM */ + +#ifdef CONFIG_MEMORY_HOTPLUG +int memory_add_physaddr_to_nid(u64 addr); +#define memory_add_physaddr_to_nid memory_add_physaddr_to_nid +#endif + +#endif /* _ASM_IA64_SPARSEMEM_H */ diff --git a/arch/ia64/include/asm/spinlock.h b/arch/ia64/include/asm/spinlock.h new file mode 100644 index 000000000000..0e5c1ad3239c --- /dev/null +++ b/arch/ia64/include/asm/spinlock.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_SPINLOCK_H +#define _ASM_IA64_SPINLOCK_H + +/* + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Walt Drummond + * + * This file is used for SMP configurations only. + */ + +#include +#include +#include + +#include +#include +#include +#include + +#define arch_spin_lock_init(x) ((x)->lock = 0) + +/* + * Ticket locks are conceptually two parts, one indicating the current head of + * the queue, and the other indicating the current tail. The lock is acquired + * by atomically noting the tail and incrementing it by one (thus adding + * ourself to the queue and noting our position), then waiting until the head + * becomes equal to the initial value of the tail. + * The pad bits in the middle are used to prevent the next_ticket number + * overflowing into the now_serving number. + * + * 31 17 16 15 14 0 + * +----------------------------------------------------+ + * | now_serving | padding | next_ticket | + * +----------------------------------------------------+ + */ + +#define TICKET_SHIFT 17 +#define TICKET_BITS 15 +#define TICKET_MASK ((1 << TICKET_BITS) - 1) + +static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock) +{ + int *p = (int *)&lock->lock, ticket, serve; + + ticket = ia64_fetchadd(1, p, acq); + + if (!(((ticket >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(p) : "memory"); + + if (!(((serve >> TICKET_SHIFT) ^ ticket) & TICKET_MASK)) + return; + cpu_relax(); + } +} + +static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock) +{ + int tmp = READ_ONCE(lock->lock); + + if (!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK)) + return ia64_cmpxchg(acq, &lock->lock, tmp, tmp + 1, sizeof (tmp)) == tmp; + return 0; +} + +static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock) +{ + unsigned short *p = (unsigned short *)&lock->lock + 1, tmp; + + /* This could be optimised with ARCH_HAS_MMIOWB */ + mmiowb(); + asm volatile ("ld2.bias %0=[%1]" : "=r"(tmp) : "r"(p)); + WRITE_ONCE(*p, (tmp + 2) & ~1); +} + +static inline int __ticket_spin_is_locked(arch_spinlock_t *lock) +{ + long tmp = READ_ONCE(lock->lock); + + return !!(((tmp >> TICKET_SHIFT) ^ tmp) & TICKET_MASK); +} + +static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) +{ + long tmp = READ_ONCE(lock->lock); + + return ((tmp - (tmp >> TICKET_SHIFT)) & TICKET_MASK) > 1; +} + +static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock) +{ + return !(((lock.lock >> TICKET_SHIFT) ^ lock.lock) & TICKET_MASK); +} + +static inline int arch_spin_is_locked(arch_spinlock_t *lock) +{ + return __ticket_spin_is_locked(lock); +} + +static inline int arch_spin_is_contended(arch_spinlock_t *lock) +{ + return __ticket_spin_is_contended(lock); +} +#define arch_spin_is_contended arch_spin_is_contended + +static __always_inline void arch_spin_lock(arch_spinlock_t *lock) +{ + __ticket_spin_lock(lock); +} + +static __always_inline int arch_spin_trylock(arch_spinlock_t *lock) +{ + return __ticket_spin_trylock(lock); +} + +static __always_inline void arch_spin_unlock(arch_spinlock_t *lock) +{ + __ticket_spin_unlock(lock); +} + +#ifdef ASM_SUPPORTED + +static __always_inline void +arch_read_lock(arch_rwlock_t *lock) +{ + unsigned long flags = 0; + + __asm__ __volatile__ ( + "tbit.nz p6, p0 = %1,%2\n" + "br.few 3f\n" + "1:\n" + "fetchadd4.rel r2 = [%0], -1;;\n" + "(p6) ssm psr.i\n" + "2:\n" + "hint @pause\n" + "ld4 r2 = [%0];;\n" + "cmp4.lt p7,p0 = r2, r0\n" + "(p7) br.cond.spnt.few 2b\n" + "(p6) rsm psr.i\n" + ";;\n" + "3:\n" + "fetchadd4.acq r2 = [%0], 1;;\n" + "cmp4.lt p7,p0 = r2, r0\n" + "(p7) br.cond.spnt.few 1b\n" + : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT) + : "p6", "p7", "r2", "memory"); +} + +#else /* !ASM_SUPPORTED */ + +#define arch_read_lock(rw) \ +do { \ + arch_rwlock_t *__read_lock_ptr = (rw); \ + \ + while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \ + ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ + while (*(volatile int *)__read_lock_ptr < 0) \ + cpu_relax(); \ + } \ +} while (0) + +#endif /* !ASM_SUPPORTED */ + +#define arch_read_unlock(rw) \ +do { \ + arch_rwlock_t *__read_lock_ptr = (rw); \ + ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \ +} while (0) + +#ifdef ASM_SUPPORTED + +static __always_inline void +arch_write_lock(arch_rwlock_t *lock) +{ + unsigned long flags = 0; + + __asm__ __volatile__ ( + "tbit.nz p6, p0 = %1, %2\n" + "mov ar.ccv = r0\n" + "dep r29 = -1, r0, 31, 1\n" + "br.few 3f;;\n" + "1:\n" + "(p6) ssm psr.i\n" + "2:\n" + "hint @pause\n" + "ld4 r2 = [%0];;\n" + "cmp4.eq p0,p7 = r0, r2\n" + "(p7) br.cond.spnt.few 2b\n" + "(p6) rsm psr.i\n" + ";;\n" + "3:\n" + "cmpxchg4.acq r2 = [%0], r29, ar.ccv;;\n" + "cmp4.eq p0,p7 = r0, r2\n" + "(p7) br.cond.spnt.few 1b;;\n" + : : "r"(lock), "r"(flags), "i"(IA64_PSR_I_BIT) + : "ar.ccv", "p6", "p7", "r2", "r29", "memory"); +} + +#define arch_write_trylock(rw) \ +({ \ + register long result; \ + \ + __asm__ __volatile__ ( \ + "mov ar.ccv = r0\n" \ + "dep r29 = -1, r0, 31, 1;;\n" \ + "cmpxchg4.acq %0 = [%1], r29, ar.ccv\n" \ + : "=r"(result) : "r"(rw) : "ar.ccv", "r29", "memory"); \ + (result == 0); \ +}) + +static inline void arch_write_unlock(arch_rwlock_t *x) +{ + u8 *y = (u8 *)x; + barrier(); + asm volatile ("st1.rel.nta [%0] = r0\n\t" :: "r"(y+3) : "memory" ); +} + +#else /* !ASM_SUPPORTED */ + +#define arch_write_lock(l) \ +({ \ + __u64 ia64_val, ia64_set_val = ia64_dep_mi(-1, 0, 31, 1); \ + __u32 *ia64_write_lock_ptr = (__u32 *) (l); \ + do { \ + while (*ia64_write_lock_ptr) \ + ia64_barrier(); \ + ia64_val = ia64_cmpxchg4_acq(ia64_write_lock_ptr, ia64_set_val, 0); \ + } while (ia64_val); \ +}) + +#define arch_write_trylock(rw) \ +({ \ + __u64 ia64_val; \ + __u64 ia64_set_val = ia64_dep_mi(-1, 0, 31,1); \ + ia64_val = ia64_cmpxchg4_acq((__u32 *)(rw), ia64_set_val, 0); \ + (ia64_val == 0); \ +}) + +static inline void arch_write_unlock(arch_rwlock_t *x) +{ + barrier(); + x->write_lock = 0; +} + +#endif /* !ASM_SUPPORTED */ + +static inline int arch_read_trylock(arch_rwlock_t *x) +{ + union { + arch_rwlock_t lock; + __u32 word; + } old, new; + old.lock = new.lock = *x; + old.lock.write_lock = new.lock.write_lock = 0; + ++new.lock.read_counter; + return (u32)ia64_cmpxchg4_acq((__u32 *)(x), new.word, old.word) == old.word; +} + +#endif /* _ASM_IA64_SPINLOCK_H */ diff --git a/arch/ia64/include/asm/spinlock_types.h b/arch/ia64/include/asm/spinlock_types.h new file mode 100644 index 000000000000..14b8a161c165 --- /dev/null +++ b/arch/ia64/include/asm/spinlock_types.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_SPINLOCK_TYPES_H +#define _ASM_IA64_SPINLOCK_TYPES_H + +#ifndef __LINUX_SPINLOCK_TYPES_RAW_H +# error "please don't include this file directly" +#endif + +typedef struct { + volatile unsigned int lock; +} arch_spinlock_t; + +#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } + +typedef struct { + volatile unsigned int read_counter : 31; + volatile unsigned int write_lock : 1; +} arch_rwlock_t; + +#define __ARCH_RW_LOCK_UNLOCKED { 0, 0 } + +#endif diff --git a/arch/ia64/include/asm/string.h b/arch/ia64/include/asm/string.h new file mode 100644 index 000000000000..8b84df0dbfad --- /dev/null +++ b/arch/ia64/include/asm/string.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_STRING_H +#define _ASM_IA64_STRING_H + +/* + * Here is where we want to put optimized versions of the string + * routines. + * + * Copyright (C) 1998-2000, 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#define __HAVE_ARCH_STRLEN 1 /* see arch/ia64/lib/strlen.S */ +#define __HAVE_ARCH_MEMSET 1 /* see arch/ia64/lib/memset.S */ +#define __HAVE_ARCH_MEMCPY 1 /* see arch/ia64/lib/memcpy.S */ + +extern __kernel_size_t strlen (const char *); +extern void *memcpy (void *, const void *, __kernel_size_t); +extern void *memset (void *, int, __kernel_size_t); + +#endif /* _ASM_IA64_STRING_H */ diff --git a/arch/ia64/include/asm/switch_to.h b/arch/ia64/include/asm/switch_to.h new file mode 100644 index 000000000000..a5a4e09468fa --- /dev/null +++ b/arch/ia64/include/asm/switch_to.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Low-level task switching. This is based on information published in + * the Processor Abstraction Layer and the System Abstraction Layer + * manual. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ +#ifndef _ASM_IA64_SWITCH_TO_H +#define _ASM_IA64_SWITCH_TO_H + +#include + +struct task_struct; + +/* + * Context switch from one thread to another. If the two threads have + * different address spaces, schedule() has already taken care of + * switching to the new address space by calling switch_mm(). + * + * Disabling access to the fph partition and the debug-register + * context switch MUST be done before calling ia64_switch_to() since a + * newly created thread returns directly to + * ia64_ret_from_syscall_clear_r8. + */ +extern struct task_struct *ia64_switch_to (void *next_task); + +extern void ia64_save_extra (struct task_struct *task); +extern void ia64_load_extra (struct task_struct *task); + +#define IA64_HAS_EXTRA_STATE(t) \ + ((t)->thread.flags & (IA64_THREAD_DBG_VALID|IA64_THREAD_PM_VALID)) + +#define __switch_to(prev,next,last) do { \ + if (IA64_HAS_EXTRA_STATE(prev)) \ + ia64_save_extra(prev); \ + if (IA64_HAS_EXTRA_STATE(next)) \ + ia64_load_extra(next); \ + ia64_psr(task_pt_regs(next))->dfh = !ia64_is_local_fpu_owner(next); \ + (last) = ia64_switch_to((next)); \ +} while (0) + +#ifdef CONFIG_SMP +/* + * In the SMP case, we save the fph state when context-switching away from a thread that + * modified fph. This way, when the thread gets scheduled on another CPU, the CPU can + * pick up the state from task->thread.fph, avoiding the complication of having to fetch + * the latest fph state from another CPU. In other words: eager save, lazy restore. + */ +# define switch_to(prev,next,last) do { \ + if (ia64_psr(task_pt_regs(prev))->mfh && ia64_is_local_fpu_owner(prev)) { \ + ia64_psr(task_pt_regs(prev))->mfh = 0; \ + (prev)->thread.flags |= IA64_THREAD_FPH_VALID; \ + __ia64_save_fpu((prev)->thread.fph); \ + } \ + __switch_to(prev, next, last); \ + /* "next" in old context is "current" in new context */ \ + if (unlikely((current->thread.flags & IA64_THREAD_MIGRATION) && \ + (task_cpu(current) != \ + task_thread_info(current)->last_cpu))) { \ + task_thread_info(current)->last_cpu = task_cpu(current); \ + } \ +} while (0) +#else +# define switch_to(prev,next,last) __switch_to(prev, next, last) +#endif + +#endif /* _ASM_IA64_SWITCH_TO_H */ diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h new file mode 100644 index 000000000000..2b02a3fb862a --- /dev/null +++ b/arch/ia64/include/asm/syscall.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Access to user system call parameters and results + * + * Copyright (C) 2008 Intel Corp. Shaohua Li + * + * See asm-generic/syscall.h for descriptions of what we must do here. + */ + +#ifndef _ASM_SYSCALL_H +#define _ASM_SYSCALL_H 1 + +#include +#include +#include + +static inline long syscall_get_nr(struct task_struct *task, + struct pt_regs *regs) +{ + if ((long)regs->cr_ifs < 0) /* Not a syscall */ + return -1; + + return regs->r15; +} + +static inline void syscall_rollback(struct task_struct *task, + struct pt_regs *regs) +{ + /* do nothing */ +} + +static inline long syscall_get_error(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r10 == -1 ? -regs->r8:0; +} + +static inline long syscall_get_return_value(struct task_struct *task, + struct pt_regs *regs) +{ + return regs->r8; +} + +static inline void syscall_set_return_value(struct task_struct *task, + struct pt_regs *regs, + int error, long val) +{ + if (error) { + /* error < 0, but ia64 uses > 0 return value */ + regs->r8 = -error; + regs->r10 = -1; + } else { + regs->r8 = val; + regs->r10 = 0; + } +} + +extern void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned long *args); + +static inline int syscall_get_arch(struct task_struct *task) +{ + return AUDIT_ARCH_IA64; +} +#endif /* _ASM_SYSCALL_H */ diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h new file mode 100644 index 000000000000..21b257117e0a --- /dev/null +++ b/arch/ia64/include/asm/thread_info.h @@ -0,0 +1,131 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_THREAD_INFO_H +#define _ASM_IA64_THREAD_INFO_H + +#ifndef ASM_OFFSETS_C +#include +#endif +#include +#include + +#define THREAD_SIZE KERNEL_STACK_SIZE + +#ifndef __ASSEMBLY__ + +/* + * On IA-64, we want to keep the task structure and kernel stack together, so they can be + * mapped by a single TLB entry and so they can be addressed by the "current" pointer + * without having to do pointer masking. + */ +struct thread_info { + struct task_struct *task; /* XXX not really needed, except for dup_task_struct() */ + __u32 flags; /* thread_info flags (see TIF_*) */ + __u32 cpu; /* current CPU */ + __u32 last_cpu; /* Last CPU thread ran on */ + __u32 status; /* Thread synchronous flags */ + int preempt_count; /* 0=premptable, <0=BUG; will also serve as bh-counter */ +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + __u64 utime; + __u64 stime; + __u64 gtime; + __u64 hardirq_time; + __u64 softirq_time; + __u64 idle_time; + __u64 ac_stamp; + __u64 ac_leave; + __u64 ac_stime; + __u64 ac_utime; +#endif +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .task = &tsk, \ + .flags = 0, \ + .cpu = 0, \ + .preempt_count = INIT_PREEMPT_COUNT, \ +} + +#ifndef ASM_OFFSETS_C +/* how to get the thread information struct from C */ +#define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) +#define arch_alloc_thread_stack_node(tsk, node) \ + ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) +#define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) +#else +#define current_thread_info() ((struct thread_info *) 0) +#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) +#define task_thread_info(tsk) ((struct thread_info *) 0) +#endif +#define arch_free_thread_stack(tsk) /* nothing */ +#define task_stack_page(tsk) ((void *)(tsk)) + +#define __HAVE_THREAD_FUNCTIONS +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +#define setup_thread_stack(p, org) \ + *task_thread_info(p) = *task_thread_info(org); \ + task_thread_info(p)->ac_stime = 0; \ + task_thread_info(p)->ac_utime = 0; \ + task_thread_info(p)->task = (p); +#else +#define setup_thread_stack(p, org) \ + *task_thread_info(p) = *task_thread_info(org); \ + task_thread_info(p)->task = (p); +#endif +#define end_of_stack(p) (unsigned long *)((void *)(p) + IA64_RBS_OFFSET) + +#define alloc_task_struct_node(node) \ +({ \ + struct page *page = alloc_pages_node(node, GFP_KERNEL | __GFP_COMP, \ + KERNEL_STACK_SIZE_ORDER); \ + struct task_struct *ret = page ? page_address(page) : NULL; \ + \ + ret; \ +}) +#define free_task_struct(tsk) free_pages((unsigned long) (tsk), KERNEL_STACK_SIZE_ORDER) + +#endif /* !__ASSEMBLY */ + +/* + * thread information flags + * - these are process state flags that various assembly files may need to access + * - pending work-to-be-done flags are in least-significant 16 bits, other flags + * in top 16 bits + */ +#define TIF_SIGPENDING 0 /* signal pending */ +#define TIF_NEED_RESCHED 1 /* rescheduling necessary */ +#define TIF_SYSCALL_TRACE 2 /* syscall trace active */ +#define TIF_SYSCALL_AUDIT 3 /* syscall auditing active */ +#define TIF_SINGLESTEP 4 /* restore singlestep on return to user mode */ +#define TIF_NOTIFY_SIGNAL 5 /* signal notification exist */ +#define TIF_NOTIFY_RESUME 6 /* resumption notification requested */ +#define TIF_MEMDIE 17 /* is terminating due to OOM killer */ +#define TIF_MCA_INIT 18 /* this task is processing MCA or INIT */ +#define TIF_DB_DISABLED 19 /* debug trap disabled for fsyscall */ +#define TIF_RESTORE_RSE 21 /* user RBS is newer than kernel RBS */ +#define TIF_POLLING_NRFLAG 22 /* idle is polling for TIF_NEED_RESCHED */ + +#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) +#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) +#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SYSCALL_TRACEAUDIT (_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP) +#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) +#define _TIF_NOTIFY_SIGNAL (1 << TIF_NOTIFY_SIGNAL) +#define _TIF_SIGPENDING (1 << TIF_SIGPENDING) +#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) +#define _TIF_MCA_INIT (1 << TIF_MCA_INIT) +#define _TIF_DB_DISABLED (1 << TIF_DB_DISABLED) +#define _TIF_RESTORE_RSE (1 << TIF_RESTORE_RSE) +#define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) + +/* "work to do on user-return" bits */ +#define TIF_ALLWORK_MASK (_TIF_SIGPENDING|_TIF_NOTIFY_RESUME|_TIF_SYSCALL_AUDIT|\ + _TIF_NEED_RESCHED|_TIF_SYSCALL_TRACE|_TIF_NOTIFY_SIGNAL) +/* like TIF_ALLWORK_BITS but sans TIF_SYSCALL_TRACE or TIF_SYSCALL_AUDIT */ +#define TIF_WORK_MASK (TIF_ALLWORK_MASK&~(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT)) + +#endif /* _ASM_IA64_THREAD_INFO_H */ diff --git a/arch/ia64/include/asm/timex.h b/arch/ia64/include/asm/timex.h new file mode 100644 index 000000000000..7ccc077a60be --- /dev/null +++ b/arch/ia64/include/asm/timex.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_TIMEX_H +#define _ASM_IA64_TIMEX_H + +/* + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * 2001/01/18 davidm Removed CLOCK_TICK_RATE. It makes no sense on IA-64. + * Also removed cacheflush_time as it's entirely unused. + */ + +#include +#include + +typedef unsigned long cycles_t; + +extern void (*ia64_udelay)(unsigned long usecs); + +/* + * For performance reasons, we don't want to define CLOCK_TICK_TRATE as + * local_cpu_data->itc_rate. Fortunately, we don't have to, either: according to George + * Anzinger, 1/CLOCK_TICK_RATE is taken as the resolution of the timer clock. The time + * calculation assumes that you will use enough of these so that your tick size <= 1/HZ. + * If the calculation shows that your CLOCK_TICK_RATE can not supply exactly 1/HZ ticks, + * the actual value is calculated and used to update the wall clock each jiffie. Setting + * the CLOCK_TICK_RATE to x*HZ insures that the calculation will find no errors. Hence we + * pick a multiple of HZ which gives us a (totally virtual) CLOCK_TICK_RATE of about + * 100MHz. + */ +#define CLOCK_TICK_RATE (HZ * 100000UL) + +static inline cycles_t +get_cycles (void) +{ + cycles_t ret; + + ret = ia64_getreg(_IA64_REG_AR_ITC); + return ret; +} +#define get_cycles get_cycles + +extern void ia64_cpu_local_tick (void); +extern unsigned long long ia64_native_sched_clock (void); + +#endif /* _ASM_IA64_TIMEX_H */ diff --git a/arch/ia64/include/asm/tlb.h b/arch/ia64/include/asm/tlb.h new file mode 100644 index 000000000000..a15fe0809aae --- /dev/null +++ b/arch/ia64/include/asm/tlb.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_TLB_H +#define _ASM_IA64_TLB_H +/* + * Based on . + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * Removing a translation from a page table (including TLB-shootdown) is a four-step + * procedure: + * + * (1) Flush (virtual) caches --- ensures virtual memory is coherent with kernel memory + * (this is a no-op on ia64). + * (2) Clear the relevant portions of the page-table + * (3) Flush the TLBs --- ensures that stale content is gone from CPU TLBs + * (4) Release the pages that were freed up in step (2). + * + * Note that the ordering of these steps is crucial to avoid races on MP machines. + * + * The Linux kernel defines several platform-specific hooks for TLB-shootdown. When + * unmapping a portion of the virtual address space, these hooks are called according to + * the following template: + * + * tlb <- tlb_gather_mmu(mm); // start unmap for address space MM + * { + * for each vma that needs a shootdown do { + * tlb_start_vma(tlb, vma); + * for each page-table-entry PTE that needs to be removed do { + * tlb_remove_tlb_entry(tlb, pte, address); + * if (pte refers to a normal page) { + * tlb_remove_page(tlb, page); + * } + * } + * tlb_end_vma(tlb, vma); + * } + * } + * tlb_finish_mmu(tlb); // finish unmap for address space MM + */ +#include +#include +#include + +#include +#include + +#include + +#endif /* _ASM_IA64_TLB_H */ diff --git a/arch/ia64/include/asm/tlbflush.h b/arch/ia64/include/asm/tlbflush.h new file mode 100644 index 000000000000..ceac10c4d6e2 --- /dev/null +++ b/arch/ia64/include/asm/tlbflush.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_TLBFLUSH_H +#define _ASM_IA64_TLBFLUSH_H + +/* + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include + +#include +#include +#include + +struct ia64_tr_entry { + u64 ifa; + u64 itir; + u64 pte; + u64 rr; +}; /*Record for tr entry!*/ + +extern int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size); +extern void ia64_ptr_entry(u64 target_mask, int slot); +extern struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; + +/* + region register macros +*/ +#define RR_TO_VE(val) (((val) >> 0) & 0x0000000000000001) +#define RR_VE(val) (((val) & 0x0000000000000001) << 0) +#define RR_VE_MASK 0x0000000000000001L +#define RR_VE_SHIFT 0 +#define RR_TO_PS(val) (((val) >> 2) & 0x000000000000003f) +#define RR_PS(val) (((val) & 0x000000000000003f) << 2) +#define RR_PS_MASK 0x00000000000000fcL +#define RR_PS_SHIFT 2 +#define RR_RID_MASK 0x00000000ffffff00L +#define RR_TO_RID(val) ((val >> 8) & 0xffffff) + +/* + * Now for some TLB flushing routines. This is the kind of stuff that + * can be very expensive, so try to avoid them whenever possible. + */ +extern void setup_ptcg_sem(int max_purges, int from_palo); + +/* + * Flush everything (kernel mapping may also have changed due to + * vmalloc/vfree). + */ +extern void local_flush_tlb_all (void); + +#ifdef CONFIG_SMP + extern void smp_flush_tlb_all (void); + extern void smp_flush_tlb_mm (struct mm_struct *mm); + extern void smp_flush_tlb_cpumask (cpumask_t xcpumask); +# define flush_tlb_all() smp_flush_tlb_all() +#else +# define flush_tlb_all() local_flush_tlb_all() +# define smp_flush_tlb_cpumask(m) local_flush_tlb_all() +#endif + +static inline void +local_finish_flush_tlb_mm (struct mm_struct *mm) +{ + if (mm == current->active_mm) + activate_context(mm); +} + +/* + * Flush a specified user mapping. This is called, e.g., as a result of fork() and + * exit(). fork() ends up here because the copy-on-write mechanism needs to write-protect + * the PTEs of the parent task. + */ +static inline void +flush_tlb_mm (struct mm_struct *mm) +{ + if (!mm) + return; + + set_bit(mm->context, ia64_ctx.flushmap); + mm->context = 0; + + if (atomic_read(&mm->mm_users) == 0) + return; /* happens as a result of exit_mmap() */ + +#ifdef CONFIG_SMP + smp_flush_tlb_mm(mm); +#else + local_finish_flush_tlb_mm(mm); +#endif +} + +extern void flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end); + +/* + * Page-granular tlb flush. + */ +static inline void +flush_tlb_page (struct vm_area_struct *vma, unsigned long addr) +{ +#ifdef CONFIG_SMP + flush_tlb_range(vma, (addr & PAGE_MASK), (addr & PAGE_MASK) + PAGE_SIZE); +#else + if (vma->vm_mm == current->active_mm) + ia64_ptcl(addr, (PAGE_SHIFT << 2)); + else + vma->vm_mm->context = 0; +#endif +} + +/* + * Flush the local TLB. Invoked from another cpu using an IPI. + */ +#ifdef CONFIG_SMP +void smp_local_flush_tlb(void); +#else +#define smp_local_flush_tlb() +#endif + +static inline void flush_tlb_kernel_range(unsigned long start, + unsigned long end) +{ + flush_tlb_all(); /* XXX fix me */ +} + +#endif /* _ASM_IA64_TLBFLUSH_H */ diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h new file mode 100644 index 000000000000..43567240b0d6 --- /dev/null +++ b/arch/ia64/include/asm/topology.h @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (C) 2002, Erich Focht, NEC + * + * All rights reserved. + */ +#ifndef _ASM_IA64_TOPOLOGY_H +#define _ASM_IA64_TOPOLOGY_H + +#include +#include +#include + +#ifdef CONFIG_NUMA + +/* Nodes w/o CPUs are preferred for memory allocations, see build_zonelists */ +#define PENALTY_FOR_NODE_WITH_CPUS 255 + +/* + * Nodes within this distance are eligible for reclaim by zone_reclaim() when + * zone_reclaim_mode is enabled. + */ +#define RECLAIM_DISTANCE 15 + +/* + * Returns a bitmask of CPUs on Node 'node'. + */ +#define cpumask_of_node(node) ((node) == -1 ? \ + cpu_all_mask : \ + &node_to_cpu_mask[node]) + +/* + * Determines the node for a given pci bus + */ +#define pcibus_to_node(bus) PCI_CONTROLLER(bus)->node + +void build_cpu_to_node_map(void); + +#endif /* CONFIG_NUMA */ + +#ifdef CONFIG_SMP +#define topology_physical_package_id(cpu) (cpu_data(cpu)->socket_id) +#define topology_core_id(cpu) (cpu_data(cpu)->core_id) +#define topology_core_cpumask(cpu) (&cpu_core_map[cpu]) +#define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) +#endif + +extern void arch_fix_phys_package_id(int num, u32 slot); + +#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ? \ + cpu_all_mask : \ + cpumask_of_node(pcibus_to_node(bus))) + +#include + +#endif /* _ASM_IA64_TOPOLOGY_H */ diff --git a/arch/ia64/include/asm/types.h b/arch/ia64/include/asm/types.h new file mode 100644 index 000000000000..5ddc7703de99 --- /dev/null +++ b/arch/ia64/include/asm/types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file is never included by application software unless explicitly + * requested (e.g., via linux/types.h) in which case the application is + * Linux specific so (user-) name space pollution is not a major issue. + * However, for interoperability, libraries still need to be careful to + * avoid naming clashes. + * + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _ASM_IA64_TYPES_H +#define _ASM_IA64_TYPES_H + +#include +#include + +#ifdef __ASSEMBLY__ +#else +/* + * These aren't exported outside the kernel to avoid name space clashes + */ + +struct fnptr { + unsigned long ip; + unsigned long gp; +}; + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_TYPES_H */ diff --git a/arch/ia64/include/asm/uaccess.h b/arch/ia64/include/asm/uaccess.h new file mode 100644 index 000000000000..60adadeb3e9e --- /dev/null +++ b/arch/ia64/include/asm/uaccess.h @@ -0,0 +1,265 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_UACCESS_H +#define _ASM_IA64_UACCESS_H + +/* + * This file defines various macros to transfer memory areas across + * the user/kernel boundary. This needs to be done carefully because + * this code is executed in kernel mode and uses user-specified + * addresses. Thus, we need to be careful not to let the user to + * trick us into accessing kernel memory that would normally be + * inaccessible. This code is also fairly performance sensitive, + * so we want to spend as little time doing safety checks as + * possible. + * + * To make matters a bit more interesting, these macros sometimes also + * called from within the kernel itself, in which case the address + * validity check must be skipped. The get_fs() macro tells us what + * to do: if get_fs()==USER_DS, checking is performed, if + * get_fs()==KERNEL_DS, checking is bypassed. + * + * Note that even if the memory area specified by the user is in a + * valid address range, it is still possible that we'll get a page + * fault while accessing it. This is handled by filling out an + * exception handler fixup entry for each instruction that has the + * potential to fault. When such a fault occurs, the page fault + * handler checks to see whether the faulting instruction has a fixup + * associated and, if so, sets r8 to -EFAULT and clears r9 to 0 and + * then resumes execution at the continuation point. + * + * Based on . + * + * Copyright (C) 1998, 1999, 2001-2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include + +#include +#include +#include +#include + +/* + * When accessing user memory, we need to make sure the entire area really is + * in user-level space. We also need to make sure that the address doesn't + * point inside the virtually mapped linear page table. + */ +static inline int __access_ok(const void __user *p, unsigned long size) +{ + unsigned long limit = TASK_SIZE; + unsigned long addr = (unsigned long)p; + + return likely((size <= limit) && (addr <= (limit - size)) && + likely(REGION_OFFSET(addr) < RGN_MAP_LIMIT)); +} +#define __access_ok __access_ok +#include + +/* + * These are the main single-value transfer routines. They automatically + * use the right size if we just have the right pointer type. + * + * Careful to not + * (a) re-use the arguments for side effects (sizeof/typeof is ok) + * (b) require any knowledge of processes at this stage + */ +#define put_user(x, ptr) __put_user_check((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) +#define get_user(x, ptr) __get_user_check((x), (ptr), sizeof(*(ptr))) + +/* + * The "__xxx" versions do not do address space checking, useful when + * doing multiple accesses to the same area (the programmer has to do the + * checks by hand with "access_ok()") + */ +#define __put_user(x, ptr) __put_user_nocheck((__typeof__(*(ptr))) (x), (ptr), sizeof(*(ptr))) +#define __get_user(x, ptr) __get_user_nocheck((x), (ptr), sizeof(*(ptr))) + +#ifdef ASM_SUPPORTED + struct __large_struct { unsigned long buf[100]; }; +# define __m(x) (*(struct __large_struct __user *)(x)) + +/* We need to declare the __ex_table section before we can use it in .xdata. */ +asm (".section \"__ex_table\", \"a\"\n\t.previous"); + +# define __get_user_size(val, addr, n, err) \ +do { \ + register long __gu_r8 asm ("r8") = 0; \ + register long __gu_r9 asm ("r9"); \ + asm ("\n[1:]\tld"#n" %0=%2%P2\t// %0 and %1 get overwritten by exception handler\n" \ + "\t.xdata4 \"__ex_table\", 1b-., 1f-.+4\n" \ + "[1:]" \ + : "=r"(__gu_r9), "=r"(__gu_r8) : "m"(__m(addr)), "1"(__gu_r8)); \ + (err) = __gu_r8; \ + (val) = __gu_r9; \ +} while (0) + +/* + * The "__put_user_size()" macro tells gcc it reads from memory instead of writing it. This + * is because they do not write to any memory gcc knows about, so there are no aliasing + * issues. + */ +# define __put_user_size(val, addr, n, err) \ +do { \ + register long __pu_r8 asm ("r8") = 0; \ + asm volatile ("\n[1:]\tst"#n" %1=%r2%P1\t// %0 gets overwritten by exception handler\n" \ + "\t.xdata4 \"__ex_table\", 1b-., 1f-.\n" \ + "[1:]" \ + : "=r"(__pu_r8) : "m"(__m(addr)), "rO"(val), "0"(__pu_r8)); \ + (err) = __pu_r8; \ +} while (0) + +#else /* !ASM_SUPPORTED */ +# define RELOC_TYPE 2 /* ip-rel */ +# define __get_user_size(val, addr, n, err) \ +do { \ + __ld_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE); \ + (err) = ia64_getreg(_IA64_REG_R8); \ + (val) = ia64_getreg(_IA64_REG_R9); \ +} while (0) +# define __put_user_size(val, addr, n, err) \ +do { \ + __st_user("__ex_table", (unsigned long) addr, n, RELOC_TYPE, \ + (__force unsigned long) (val)); \ + (err) = ia64_getreg(_IA64_REG_R8); \ +} while (0) +#endif /* !ASM_SUPPORTED */ + +extern void __get_user_unknown (void); + +/* + * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which + * could clobber r8 and r9 (among others). Thus, be careful not to evaluate it while + * using r8/r9. + */ +#define __do_get_user(check, x, ptr, size) \ +({ \ + const __typeof__(*(ptr)) __user *__gu_ptr = (ptr); \ + __typeof__ (size) __gu_size = (size); \ + long __gu_err = -EFAULT; \ + unsigned long __gu_val = 0; \ + if (!check || __access_ok(__gu_ptr, size)) \ + switch (__gu_size) { \ + case 1: __get_user_size(__gu_val, __gu_ptr, 1, __gu_err); break; \ + case 2: __get_user_size(__gu_val, __gu_ptr, 2, __gu_err); break; \ + case 4: __get_user_size(__gu_val, __gu_ptr, 4, __gu_err); break; \ + case 8: __get_user_size(__gu_val, __gu_ptr, 8, __gu_err); break; \ + default: __get_user_unknown(); break; \ + } \ + (x) = (__force __typeof__(*(__gu_ptr))) __gu_val; \ + __gu_err; \ +}) + +#define __get_user_nocheck(x, ptr, size) __do_get_user(0, x, ptr, size) +#define __get_user_check(x, ptr, size) __do_get_user(1, x, ptr, size) + +extern void __put_user_unknown (void); + +/* + * Evaluating arguments X, PTR, SIZE, and SEGMENT may involve subroutine-calls, which + * could clobber r8 (among others). Thus, be careful not to evaluate them while using r8. + */ +#define __do_put_user(check, x, ptr, size) \ +({ \ + __typeof__ (x) __pu_x = (x); \ + __typeof__ (*(ptr)) __user *__pu_ptr = (ptr); \ + __typeof__ (size) __pu_size = (size); \ + long __pu_err = -EFAULT; \ + \ + if (!check || __access_ok(__pu_ptr, __pu_size)) \ + switch (__pu_size) { \ + case 1: __put_user_size(__pu_x, __pu_ptr, 1, __pu_err); break; \ + case 2: __put_user_size(__pu_x, __pu_ptr, 2, __pu_err); break; \ + case 4: __put_user_size(__pu_x, __pu_ptr, 4, __pu_err); break; \ + case 8: __put_user_size(__pu_x, __pu_ptr, 8, __pu_err); break; \ + default: __put_user_unknown(); break; \ + } \ + __pu_err; \ +}) + +#define __put_user_nocheck(x, ptr, size) __do_put_user(0, x, ptr, size) +#define __put_user_check(x, ptr, size) __do_put_user(1, x, ptr, size) + +/* + * Complex access routines + */ +extern unsigned long __must_check __copy_user (void __user *to, const void __user *from, + unsigned long count); + +static inline unsigned long +raw_copy_to_user(void __user *to, const void *from, unsigned long count) +{ + return __copy_user(to, (__force void __user *) from, count); +} + +static inline unsigned long +raw_copy_from_user(void *to, const void __user *from, unsigned long count) +{ + return __copy_user((__force void __user *) to, from, count); +} + +#define INLINE_COPY_FROM_USER +#define INLINE_COPY_TO_USER + +extern unsigned long __do_clear_user (void __user *, unsigned long); + +#define __clear_user(to, n) __do_clear_user(to, n) + +#define clear_user(to, n) \ +({ \ + unsigned long __cu_len = (n); \ + if (__access_ok(to, __cu_len)) \ + __cu_len = __do_clear_user(to, __cu_len); \ + __cu_len; \ +}) + + +/* + * Returns: -EFAULT if exception before terminator, N if the entire buffer filled, else + * strlen. + */ +extern long __must_check __strncpy_from_user (char *to, const char __user *from, long to_len); + +#define strncpy_from_user(to, from, n) \ +({ \ + const char __user * __sfu_from = (from); \ + long __sfu_ret = -EFAULT; \ + if (__access_ok(__sfu_from, 0)) \ + __sfu_ret = __strncpy_from_user((to), __sfu_from, (n)); \ + __sfu_ret; \ +}) + +/* + * Returns: 0 if exception before NUL or reaching the supplied limit + * (N), a value greater than N if the limit would be exceeded, else + * strlen. + */ +extern unsigned long __strnlen_user (const char __user *, long); + +#define strnlen_user(str, len) \ +({ \ + const char __user *__su_str = (str); \ + unsigned long __su_ret = 0; \ + if (__access_ok(__su_str, 0)) \ + __su_ret = __strnlen_user(__su_str, len); \ + __su_ret; \ +}) + +#define ARCH_HAS_TRANSLATE_MEM_PTR 1 +static __inline__ void * +xlate_dev_mem_ptr(phys_addr_t p) +{ + struct page *page; + void *ptr; + + page = pfn_to_page(p >> PAGE_SHIFT); + if (PageUncached(page)) + ptr = (void *)p + __IA64_UNCACHED_OFFSET; + else + ptr = __va(p); + + return ptr; +} + +#endif /* _ASM_IA64_UACCESS_H */ diff --git a/arch/ia64/include/asm/uncached.h b/arch/ia64/include/asm/uncached.h new file mode 100644 index 000000000000..98f447fc77b7 --- /dev/null +++ b/arch/ia64/include/asm/uncached.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. + * + * Prototypes for the uncached page allocator + */ + +extern unsigned long uncached_alloc_page(int starting_nid, int n_pages); +extern void uncached_free_page(unsigned long uc_addr, int n_pages); diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h new file mode 100644 index 000000000000..9ba6110b10b9 --- /dev/null +++ b/arch/ia64/include/asm/unistd.h @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IA-64 Linux syscall numbers and inline-functions. + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _ASM_IA64_UNISTD_H +#define _ASM_IA64_UNISTD_H + +#include + +#define NR_syscalls __NR_syscalls /* length of syscall table */ + +#define __ARCH_WANT_NEW_STAT +#define __ARCH_WANT_SYS_UTIME + +#if !defined(__ASSEMBLY__) && !defined(ASSEMBLER) + +#include +#include +#include + +extern long __ia64_syscall (long a0, long a1, long a2, long a3, long a4, long nr); + +asmlinkage unsigned long sys_mmap( + unsigned long addr, unsigned long len, + int prot, int flags, + int fd, long off); +asmlinkage unsigned long sys_mmap2( + unsigned long addr, unsigned long len, + int prot, int flags, + int fd, long pgoff); +struct pt_regs; +asmlinkage long sys_ia64_pipe(void); + +#endif /* !__ASSEMBLY__ */ +#endif /* _ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/include/asm/unwind.h b/arch/ia64/include/asm/unwind.h new file mode 100644 index 000000000000..c5bd4b3e3a36 --- /dev/null +++ b/arch/ia64/include/asm/unwind.h @@ -0,0 +1,234 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_UNWIND_H +#define _ASM_IA64_UNWIND_H + +/* + * Copyright (C) 1999-2000, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * A simple API for unwinding kernel stacks. This is used for + * debugging and error reporting purposes. The kernel doesn't need + * full-blown stack unwinding with all the bells and whitles, so there + * is not much point in implementing the full IA-64 unwind API (though + * it would of course be possible to implement the kernel API on top + * of it). + */ + +struct task_struct; /* forward declaration */ +struct switch_stack; /* forward declaration */ + +enum unw_application_register { + UNW_AR_BSP, + UNW_AR_BSPSTORE, + UNW_AR_PFS, + UNW_AR_RNAT, + UNW_AR_UNAT, + UNW_AR_LC, + UNW_AR_EC, + UNW_AR_FPSR, + UNW_AR_RSC, + UNW_AR_CCV, + UNW_AR_CSD, + UNW_AR_SSD +}; + +/* + * The following declarations are private to the unwind + * implementation: + */ + +struct unw_stack { + unsigned long limit; + unsigned long top; +}; + +#define UNW_FLAG_INTERRUPT_FRAME (1UL << 0) + +/* + * No user of this module should every access this structure directly + * as it is subject to change. It is declared here solely so we can + * use automatic variables. + */ +struct unw_frame_info { + struct unw_stack regstk; + struct unw_stack memstk; + unsigned int flags; + short hint; + short prev_script; + + /* current frame info: */ + unsigned long bsp; /* backing store pointer value */ + unsigned long sp; /* stack pointer value */ + unsigned long psp; /* previous sp value */ + unsigned long ip; /* instruction pointer value */ + unsigned long pr; /* current predicate values */ + unsigned long *cfm_loc; /* cfm save location (or NULL) */ + unsigned long pt; /* struct pt_regs location */ + + struct task_struct *task; + struct switch_stack *sw; + + /* preserved state: */ + unsigned long *bsp_loc; /* previous bsp save location */ + unsigned long *bspstore_loc; + unsigned long *pfs_loc; + unsigned long *rnat_loc; + unsigned long *rp_loc; + unsigned long *pri_unat_loc; + unsigned long *unat_loc; + unsigned long *pr_loc; + unsigned long *lc_loc; + unsigned long *fpsr_loc; + struct unw_ireg { + unsigned long *loc; + struct unw_ireg_nat { + unsigned long type : 3; /* enum unw_nat_type */ + signed long off : 61; /* NaT word is at loc+nat.off */ + } nat; + } r4, r5, r6, r7; + unsigned long *b1_loc, *b2_loc, *b3_loc, *b4_loc, *b5_loc; + struct ia64_fpreg *f2_loc, *f3_loc, *f4_loc, *f5_loc, *fr_loc[16]; +}; + +/* + * The official API follows below: + */ + +struct unw_table_entry { + u64 start_offset; + u64 end_offset; + u64 info_offset; +}; + +/* + * Initialize unwind support. + */ +extern void unw_init (void); + +extern void *unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, + const void *table_start, const void *table_end); + +extern void unw_remove_unwind_table (void *handle); + +/* + * Prepare to unwind blocked task t. + */ +extern void unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t); + +extern void unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, + struct switch_stack *sw); + +/* + * Prepare to unwind the currently running thread. + */ +extern void unw_init_running (void (*callback)(struct unw_frame_info *info, void *arg), void *arg); + +/* + * Unwind to previous to frame. Returns 0 if successful, negative + * number in case of an error. + */ +extern int unw_unwind (struct unw_frame_info *info); + +/* + * Unwind until the return pointer is in user-land (or until an error + * occurs). Returns 0 if successful, negative number in case of + * error. + */ +extern int unw_unwind_to_user (struct unw_frame_info *info); + +#define unw_is_intr_frame(info) (((info)->flags & UNW_FLAG_INTERRUPT_FRAME) != 0) + +static inline int +unw_get_ip (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->ip; + return 0; +} + +static inline int +unw_get_sp (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->sp; + return 0; +} + +static inline int +unw_get_psp (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->psp; + return 0; +} + +static inline int +unw_get_bsp (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = (info)->bsp; + return 0; +} + +static inline int +unw_get_cfm (struct unw_frame_info *info, unsigned long *valp) +{ + *valp = *(info)->cfm_loc; + return 0; +} + +static inline int +unw_set_cfm (struct unw_frame_info *info, unsigned long val) +{ + *(info)->cfm_loc = val; + return 0; +} + +static inline int +unw_get_rp (struct unw_frame_info *info, unsigned long *val) +{ + if (!info->rp_loc) + return -1; + *val = *info->rp_loc; + return 0; +} + +extern int unw_access_gr (struct unw_frame_info *, int, unsigned long *, char *, int); +extern int unw_access_br (struct unw_frame_info *, int, unsigned long *, int); +extern int unw_access_fr (struct unw_frame_info *, int, struct ia64_fpreg *, int); +extern int unw_access_ar (struct unw_frame_info *, int, unsigned long *, int); +extern int unw_access_pr (struct unw_frame_info *, unsigned long *, int); + +static inline int +unw_set_gr (struct unw_frame_info *i, int n, unsigned long v, char nat) +{ + return unw_access_gr(i, n, &v, &nat, 1); +} + +static inline int +unw_set_br (struct unw_frame_info *i, int n, unsigned long v) +{ + return unw_access_br(i, n, &v, 1); +} + +static inline int +unw_set_fr (struct unw_frame_info *i, int n, struct ia64_fpreg v) +{ + return unw_access_fr(i, n, &v, 1); +} + +static inline int +unw_set_ar (struct unw_frame_info *i, int n, unsigned long v) +{ + return unw_access_ar(i, n, &v, 1); +} + +static inline int +unw_set_pr (struct unw_frame_info *i, unsigned long v) +{ + return unw_access_pr(i, &v, 1); +} + +#define unw_get_gr(i,n,v,nat) unw_access_gr(i,n,v,nat,0) +#define unw_get_br(i,n,v) unw_access_br(i,n,v,0) +#define unw_get_fr(i,n,v) unw_access_fr(i,n,v,0) +#define unw_get_ar(i,n,v) unw_access_ar(i,n,v,0) +#define unw_get_pr(i,v) unw_access_pr(i,v,0) + +#endif /* _ASM_UNWIND_H */ diff --git a/arch/ia64/include/asm/user.h b/arch/ia64/include/asm/user.h new file mode 100644 index 000000000000..ec03d3ab8715 --- /dev/null +++ b/arch/ia64/include/asm/user.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_USER_H +#define _ASM_IA64_USER_H + +/* + * Core file format: The core file is written in such a way that gdb + * can understand it and provide useful information to the user (under + * linux we use the `trad-core' bfd). The file contents are as + * follows: + * + * upage: 1 page consisting of a user struct that tells gdb + * what is present in the file. Directly after this is a + * copy of the task_struct, which is currently not used by gdb, + * but it may come in handy at some point. All of the registers + * are stored as part of the upage. The upage should always be + * only one page long. + * data: The data segment follows next. We use current->end_text to + * current->brk to pick up all of the user variables, plus any memory + * that may have been sbrk'ed. No attempt is made to determine if a + * page is demand-zero or if a page is totally unused, we just cover + * the entire range. All of the addresses are rounded in such a way + * that an integral number of pages is written. + * stack: We need the stack information in order to get a meaningful + * backtrace. We need to write the data from usp to + * current->start_stack, so we round each of these in order to be able + * to write an integer number of pages. + * + * Modified 1998, 1999, 2001 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +#include +#include + +#include + +#define EF_SIZE 3072 /* XXX fix me */ + +struct user { + unsigned long regs[EF_SIZE/8+32]; /* integer and fp regs */ + size_t u_tsize; /* text size (pages) */ + size_t u_dsize; /* data size (pages) */ + size_t u_ssize; /* stack size (pages) */ + unsigned long start_code; /* text starting address */ + unsigned long start_data; /* data starting address */ + unsigned long start_stack; /* stack starting address */ + long int signal; /* signal causing core dump */ + unsigned long u_ar0; /* help gdb find registers */ + unsigned long magic; /* identifies a core file */ + char u_comm[32]; /* user command name */ +}; + +#endif /* _ASM_IA64_USER_H */ diff --git a/arch/ia64/include/asm/ustack.h b/arch/ia64/include/asm/ustack.h new file mode 100644 index 000000000000..112d40a0fec2 --- /dev/null +++ b/arch/ia64/include/asm/ustack.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_USTACK_H +#define _ASM_IA64_USTACK_H + +#include +#include + +/* The absolute hard limit for stack size is 1/2 of the mappable space in the region */ +#define MAX_USER_STACK_SIZE (RGN_MAP_LIMIT/2) +#define STACK_TOP (0x6000000000000000UL + RGN_MAP_LIMIT) +#define STACK_TOP_MAX STACK_TOP +#endif /* _ASM_IA64_USTACK_H */ diff --git a/arch/ia64/include/asm/uv/uv.h b/arch/ia64/include/asm/uv/uv.h new file mode 100644 index 000000000000..48d4526bf4cd --- /dev/null +++ b/arch/ia64/include/asm/uv/uv.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_UV_UV_H +#define _ASM_IA64_UV_UV_H + +#ifdef CONFIG_IA64_SGI_UV +extern bool ia64_is_uv; + +static inline int is_uv_system(void) +{ + return ia64_is_uv; +} + +void __init uv_probe_system_type(void); +void __init uv_setup(char **cmdline_p); +#else /* CONFIG_IA64_SGI_UV */ +static inline int is_uv_system(void) +{ + return false; +} + +static inline void __init uv_probe_system_type(void) +{ +} + +static inline void __init uv_setup(char **cmdline_p) +{ +} +#endif /* CONFIG_IA64_SGI_UV */ + +#endif /* _ASM_IA64_UV_UV_H */ diff --git a/arch/ia64/include/asm/uv/uv_hub.h b/arch/ia64/include/asm/uv/uv_hub.h new file mode 100644 index 000000000000..809ddb6896db --- /dev/null +++ b/arch/ia64/include/asm/uv/uv_hub.h @@ -0,0 +1,315 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV architectural definitions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef __ASM_IA64_UV_HUB_H__ +#define __ASM_IA64_UV_HUB_H__ + +#include +#include +#include +#include + + +/* + * Addressing Terminology + * + * M - The low M bits of a physical address represent the offset + * into the blade local memory. RAM memory on a blade is physically + * contiguous (although various IO spaces may punch holes in + * it).. + * + * N - Number of bits in the node portion of a socket physical + * address. + * + * NASID - network ID of a router, Mbrick or Cbrick. Nasid values of + * routers always have low bit of 1, C/MBricks have low bit + * equal to 0. Most addressing macros that target UV hub chips + * right shift the NASID by 1 to exclude the always-zero bit. + * NASIDs contain up to 15 bits. + * + * GNODE - NASID right shifted by 1 bit. Most mmrs contain gnodes instead + * of nasids. + * + * PNODE - the low N bits of the GNODE. The PNODE is the most useful variant + * of the nasid for socket usage. + * + * + * NumaLink Global Physical Address Format: + * +--------------------------------+---------------------+ + * |00..000| GNODE | NodeOffset | + * +--------------------------------+---------------------+ + * |<-------53 - M bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * + * + * Memory/UV-HUB Processor Socket Address Format: + * +----------------+---------------+---------------------+ + * |00..000000000000| PNODE | NodeOffset | + * +----------------+---------------+---------------------+ + * <--- N bits --->|<--------M bits -----> + * + * M - number of node offset bits (35 .. 40) + * N - number of PNODE bits (0 .. 10) + * + * Note: M + N cannot currently exceed 44 (x86_64) or 46 (IA64). + * The actual values are configuration dependent and are set at + * boot time. M & N values are set by the hardware/BIOS at boot. + */ + + +/* + * Maximum number of bricks in all partitions and in all coherency domains. + * This is the total number of bricks accessible in the numalink fabric. It + * includes all C & M bricks. Routers are NOT included. + * + * This value is also the value of the maximum number of non-router NASIDs + * in the numalink fabric. + * + * NOTE: a brick may contain 1 or 2 OS nodes. Don't get these confused. + */ +#define UV_MAX_NUMALINK_BLADES 16384 + +/* + * Maximum number of C/Mbricks within a software SSI (hardware may support + * more). + */ +#define UV_MAX_SSI_BLADES 1 + +/* + * The largest possible NASID of a C or M brick (+ 2) + */ +#define UV_MAX_NASID_VALUE (UV_MAX_NUMALINK_NODES * 2) + +/* + * The following defines attributes of the HUB chip. These attributes are + * frequently referenced and are kept in the per-cpu data areas of each cpu. + * They are kept together in a struct to minimize cache misses. + */ +struct uv_hub_info_s { + unsigned long global_mmr_base; + unsigned long gpa_mask; + unsigned long gnode_upper; + unsigned long lowmem_remap_top; + unsigned long lowmem_remap_base; + unsigned short pnode; + unsigned short pnode_mask; + unsigned short coherency_domain_number; + unsigned short numa_blade_id; + unsigned char blade_processor_id; + unsigned char m_val; + unsigned char n_val; +}; +DECLARE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); +#define uv_hub_info this_cpu_ptr(&__uv_hub_info) +#define uv_cpu_hub_info(cpu) (&per_cpu(__uv_hub_info, cpu)) + +/* + * Local & Global MMR space macros. + * Note: macros are intended to be used ONLY by inline functions + * in this file - not by other kernel code. + * n - NASID (full 15-bit global nasid) + * g - GNODE (full 15-bit global nasid, right shifted 1) + * p - PNODE (local part of nsids, right shifted 1) + */ +#define UV_NASID_TO_PNODE(n) (((n) >> 1) & uv_hub_info->pnode_mask) +#define UV_PNODE_TO_NASID(p) (((p) << 1) | uv_hub_info->gnode_upper) + +#define UV_LOCAL_MMR_BASE 0xf4000000UL +#define UV_GLOBAL_MMR32_BASE 0xf8000000UL +#define UV_GLOBAL_MMR64_BASE (uv_hub_info->global_mmr_base) + +#define UV_GLOBAL_MMR32_PNODE_SHIFT 15 +#define UV_GLOBAL_MMR64_PNODE_SHIFT 26 + +#define UV_GLOBAL_MMR32_PNODE_BITS(p) ((p) << (UV_GLOBAL_MMR32_PNODE_SHIFT)) + +#define UV_GLOBAL_MMR64_PNODE_BITS(p) \ + ((unsigned long)(p) << UV_GLOBAL_MMR64_PNODE_SHIFT) + +/* + * Macros for converting between kernel virtual addresses, socket local physical + * addresses, and UV global physical addresses. + * Note: use the standard __pa() & __va() macros for converting + * between socket virtual and socket physical addresses. + */ + +/* socket phys RAM --> UV global physical address */ +static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) +{ + if (paddr < uv_hub_info->lowmem_remap_top) + paddr += uv_hub_info->lowmem_remap_base; + return paddr | uv_hub_info->gnode_upper; +} + + +/* socket virtual --> UV global physical address */ +static inline unsigned long uv_gpa(void *v) +{ + return __pa(v) | uv_hub_info->gnode_upper; +} + +/* socket virtual --> UV global physical address */ +static inline void *uv_vgpa(void *v) +{ + return (void *)uv_gpa(v); +} + +/* UV global physical address --> socket virtual */ +static inline void *uv_va(unsigned long gpa) +{ + return __va(gpa & uv_hub_info->gpa_mask); +} + +/* pnode, offset --> socket virtual */ +static inline void *uv_pnode_offset_to_vaddr(int pnode, unsigned long offset) +{ + return __va(((unsigned long)pnode << uv_hub_info->m_val) | offset); +} + + +/* + * Access global MMRs using the low memory MMR32 space. This region supports + * faster MMR access but not all MMRs are accessible in this space. + */ +static inline unsigned long *uv_global_mmr32_address(int pnode, + unsigned long offset) +{ + return __va(UV_GLOBAL_MMR32_BASE | + UV_GLOBAL_MMR32_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr32(int pnode, unsigned long offset, + unsigned long val) +{ + *uv_global_mmr32_address(pnode, offset) = val; +} + +static inline unsigned long uv_read_global_mmr32(int pnode, + unsigned long offset) +{ + return *uv_global_mmr32_address(pnode, offset); +} + +/* + * Access Global MMR space using the MMR space located at the top of physical + * memory. + */ +static inline unsigned long *uv_global_mmr64_address(int pnode, + unsigned long offset) +{ + return __va(UV_GLOBAL_MMR64_BASE | + UV_GLOBAL_MMR64_PNODE_BITS(pnode) | offset); +} + +static inline void uv_write_global_mmr64(int pnode, unsigned long offset, + unsigned long val) +{ + *uv_global_mmr64_address(pnode, offset) = val; +} + +static inline unsigned long uv_read_global_mmr64(int pnode, + unsigned long offset) +{ + return *uv_global_mmr64_address(pnode, offset); +} + +/* + * Access hub local MMRs. Faster than using global space but only local MMRs + * are accessible. + */ +static inline unsigned long *uv_local_mmr_address(unsigned long offset) +{ + return __va(UV_LOCAL_MMR_BASE | offset); +} + +static inline unsigned long uv_read_local_mmr(unsigned long offset) +{ + return *uv_local_mmr_address(offset); +} + +static inline void uv_write_local_mmr(unsigned long offset, unsigned long val) +{ + *uv_local_mmr_address(offset) = val; +} + +/* + * Structures and definitions for converting between cpu, node, pnode, and blade + * numbers. + */ + +/* Blade-local cpu number of current cpu. Numbered 0 .. <# cpus on the blade> */ +static inline int uv_blade_processor_id(void) +{ + return smp_processor_id(); +} + +/* Blade number of current cpu. Numnbered 0 .. <#blades -1> */ +static inline int uv_numa_blade_id(void) +{ + return 0; +} + +/* Convert a cpu number to the UV blade number */ +static inline int uv_cpu_to_blade_id(int cpu) +{ + return 0; +} + +/* Convert linux node number to the UV blade number */ +static inline int uv_node_to_blade_id(int nid) +{ + return 0; +} + +/* Convert a blade id to the PNODE of the blade */ +static inline int uv_blade_to_pnode(int bid) +{ + return 0; +} + +/* Determine the number of possible cpus on a blade */ +static inline int uv_blade_nr_possible_cpus(int bid) +{ + return num_possible_cpus(); +} + +/* Determine the number of online cpus on a blade */ +static inline int uv_blade_nr_online_cpus(int bid) +{ + return num_online_cpus(); +} + +/* Convert a cpu id to the PNODE of the blade containing the cpu */ +static inline int uv_cpu_to_pnode(int cpu) +{ + return 0; +} + +/* Convert a linux node number to the PNODE of the blade */ +static inline int uv_node_to_pnode(int nid) +{ + return 0; +} + +/* Maximum possible number of blades */ +static inline int uv_num_possible_blades(void) +{ + return 1; +} + +static inline void uv_hub_send_ipi(int pnode, int apicid, int vector) +{ + /* not currently needed on ia64 */ +} + + +#endif /* __ASM_IA64_UV_HUB__ */ + diff --git a/arch/ia64/include/asm/uv/uv_mmrs.h b/arch/ia64/include/asm/uv/uv_mmrs.h new file mode 100644 index 000000000000..fe0b8f05e1a8 --- /dev/null +++ b/arch/ia64/include/asm/uv/uv_mmrs.h @@ -0,0 +1,825 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV MMR definitions + * + * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. + */ + +#ifndef _ASM_IA64_UV_UV_MMRS_H +#define _ASM_IA64_UV_UV_MMRS_H + +#define UV_MMR_ENABLE (1UL << 63) + +/* ========================================================================= */ +/* UVH_BAU_DATA_CONFIG */ +/* ========================================================================= */ +#define UVH_BAU_DATA_CONFIG 0x61680UL +#define UVH_BAU_DATA_CONFIG_32 0x0438 + +#define UVH_BAU_DATA_CONFIG_VECTOR_SHFT 0 +#define UVH_BAU_DATA_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_BAU_DATA_CONFIG_DM_SHFT 8 +#define UVH_BAU_DATA_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_BAU_DATA_CONFIG_DESTMODE_SHFT 11 +#define UVH_BAU_DATA_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_BAU_DATA_CONFIG_STATUS_SHFT 12 +#define UVH_BAU_DATA_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_BAU_DATA_CONFIG_P_SHFT 13 +#define UVH_BAU_DATA_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_BAU_DATA_CONFIG_T_SHFT 15 +#define UVH_BAU_DATA_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_BAU_DATA_CONFIG_M_SHFT 16 +#define UVH_BAU_DATA_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_BAU_DATA_CONFIG_APIC_ID_SHFT 32 +#define UVH_BAU_DATA_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_bau_data_config_u { + unsigned long v; + struct uvh_bau_data_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0 */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0 0x70000UL +#define UVH_EVENT_OCCURRED0_32 0x005e8 + +#define UVH_EVENT_OCCURRED0_LB_HCERR_SHFT 0 +#define UVH_EVENT_OCCURRED0_LB_HCERR_MASK 0x0000000000000001UL +#define UVH_EVENT_OCCURRED0_GR0_HCERR_SHFT 1 +#define UVH_EVENT_OCCURRED0_GR0_HCERR_MASK 0x0000000000000002UL +#define UVH_EVENT_OCCURRED0_GR1_HCERR_SHFT 2 +#define UVH_EVENT_OCCURRED0_GR1_HCERR_MASK 0x0000000000000004UL +#define UVH_EVENT_OCCURRED0_LH_HCERR_SHFT 3 +#define UVH_EVENT_OCCURRED0_LH_HCERR_MASK 0x0000000000000008UL +#define UVH_EVENT_OCCURRED0_RH_HCERR_SHFT 4 +#define UVH_EVENT_OCCURRED0_RH_HCERR_MASK 0x0000000000000010UL +#define UVH_EVENT_OCCURRED0_XN_HCERR_SHFT 5 +#define UVH_EVENT_OCCURRED0_XN_HCERR_MASK 0x0000000000000020UL +#define UVH_EVENT_OCCURRED0_SI_HCERR_SHFT 6 +#define UVH_EVENT_OCCURRED0_SI_HCERR_MASK 0x0000000000000040UL +#define UVH_EVENT_OCCURRED0_LB_AOERR0_SHFT 7 +#define UVH_EVENT_OCCURRED0_LB_AOERR0_MASK 0x0000000000000080UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_SHFT 8 +#define UVH_EVENT_OCCURRED0_GR0_AOERR0_MASK 0x0000000000000100UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_SHFT 9 +#define UVH_EVENT_OCCURRED0_GR1_AOERR0_MASK 0x0000000000000200UL +#define UVH_EVENT_OCCURRED0_LH_AOERR0_SHFT 10 +#define UVH_EVENT_OCCURRED0_LH_AOERR0_MASK 0x0000000000000400UL +#define UVH_EVENT_OCCURRED0_RH_AOERR0_SHFT 11 +#define UVH_EVENT_OCCURRED0_RH_AOERR0_MASK 0x0000000000000800UL +#define UVH_EVENT_OCCURRED0_XN_AOERR0_SHFT 12 +#define UVH_EVENT_OCCURRED0_XN_AOERR0_MASK 0x0000000000001000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR0_SHFT 13 +#define UVH_EVENT_OCCURRED0_SI_AOERR0_MASK 0x0000000000002000UL +#define UVH_EVENT_OCCURRED0_LB_AOERR1_SHFT 14 +#define UVH_EVENT_OCCURRED0_LB_AOERR1_MASK 0x0000000000004000UL +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_SHFT 15 +#define UVH_EVENT_OCCURRED0_GR0_AOERR1_MASK 0x0000000000008000UL +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_SHFT 16 +#define UVH_EVENT_OCCURRED0_GR1_AOERR1_MASK 0x0000000000010000UL +#define UVH_EVENT_OCCURRED0_LH_AOERR1_SHFT 17 +#define UVH_EVENT_OCCURRED0_LH_AOERR1_MASK 0x0000000000020000UL +#define UVH_EVENT_OCCURRED0_RH_AOERR1_SHFT 18 +#define UVH_EVENT_OCCURRED0_RH_AOERR1_MASK 0x0000000000040000UL +#define UVH_EVENT_OCCURRED0_XN_AOERR1_SHFT 19 +#define UVH_EVENT_OCCURRED0_XN_AOERR1_MASK 0x0000000000080000UL +#define UVH_EVENT_OCCURRED0_SI_AOERR1_SHFT 20 +#define UVH_EVENT_OCCURRED0_SI_AOERR1_MASK 0x0000000000100000UL +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_SHFT 21 +#define UVH_EVENT_OCCURRED0_RH_VPI_INT_MASK 0x0000000000200000UL +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_SHFT 22 +#define UVH_EVENT_OCCURRED0_SYSTEM_SHUTDOWN_INT_MASK 0x0000000000400000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_SHFT 23 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_0_MASK 0x0000000000800000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_SHFT 24 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_1_MASK 0x0000000001000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_SHFT 25 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_2_MASK 0x0000000002000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_SHFT 26 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_3_MASK 0x0000000004000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_SHFT 27 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_4_MASK 0x0000000008000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_SHFT 28 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_5_MASK 0x0000000010000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_SHFT 29 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_6_MASK 0x0000000020000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_SHFT 30 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_7_MASK 0x0000000040000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_SHFT 31 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_8_MASK 0x0000000080000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_SHFT 32 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_9_MASK 0x0000000100000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_SHFT 33 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_10_MASK 0x0000000200000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_SHFT 34 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_11_MASK 0x0000000400000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_SHFT 35 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_12_MASK 0x0000000800000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_SHFT 36 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_13_MASK 0x0000001000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_SHFT 37 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_14_MASK 0x0000002000000000UL +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_SHFT 38 +#define UVH_EVENT_OCCURRED0_LB_IRQ_INT_15_MASK 0x0000004000000000UL +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_SHFT 39 +#define UVH_EVENT_OCCURRED0_L1_NMI_INT_MASK 0x0000008000000000UL +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_SHFT 40 +#define UVH_EVENT_OCCURRED0_STOP_CLOCK_MASK 0x0000010000000000UL +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_SHFT 41 +#define UVH_EVENT_OCCURRED0_ASIC_TO_L1_MASK 0x0000020000000000UL +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_SHFT 42 +#define UVH_EVENT_OCCURRED0_L1_TO_ASIC_MASK 0x0000040000000000UL +#define UVH_EVENT_OCCURRED0_LTC_INT_SHFT 43 +#define UVH_EVENT_OCCURRED0_LTC_INT_MASK 0x0000080000000000UL +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_SHFT 44 +#define UVH_EVENT_OCCURRED0_LA_SEQ_TRIGGER_MASK 0x0000100000000000UL +#define UVH_EVENT_OCCURRED0_IPI_INT_SHFT 45 +#define UVH_EVENT_OCCURRED0_IPI_INT_MASK 0x0000200000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_SHFT 46 +#define UVH_EVENT_OCCURRED0_EXTIO_INT0_MASK 0x0000400000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_SHFT 47 +#define UVH_EVENT_OCCURRED0_EXTIO_INT1_MASK 0x0000800000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_SHFT 48 +#define UVH_EVENT_OCCURRED0_EXTIO_INT2_MASK 0x0001000000000000UL +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_SHFT 49 +#define UVH_EVENT_OCCURRED0_EXTIO_INT3_MASK 0x0002000000000000UL +#define UVH_EVENT_OCCURRED0_PROFILE_INT_SHFT 50 +#define UVH_EVENT_OCCURRED0_PROFILE_INT_MASK 0x0004000000000000UL +#define UVH_EVENT_OCCURRED0_RTC0_SHFT 51 +#define UVH_EVENT_OCCURRED0_RTC0_MASK 0x0008000000000000UL +#define UVH_EVENT_OCCURRED0_RTC1_SHFT 52 +#define UVH_EVENT_OCCURRED0_RTC1_MASK 0x0010000000000000UL +#define UVH_EVENT_OCCURRED0_RTC2_SHFT 53 +#define UVH_EVENT_OCCURRED0_RTC2_MASK 0x0020000000000000UL +#define UVH_EVENT_OCCURRED0_RTC3_SHFT 54 +#define UVH_EVENT_OCCURRED0_RTC3_MASK 0x0040000000000000UL +#define UVH_EVENT_OCCURRED0_BAU_DATA_SHFT 55 +#define UVH_EVENT_OCCURRED0_BAU_DATA_MASK 0x0080000000000000UL +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_SHFT 56 +#define UVH_EVENT_OCCURRED0_POWER_MANAGEMENT_REQ_MASK 0x0100000000000000UL +union uvh_event_occurred0_u { + unsigned long v; + struct uvh_event_occurred0_s { + unsigned long lb_hcerr : 1; /* RW, W1C */ + unsigned long gr0_hcerr : 1; /* RW, W1C */ + unsigned long gr1_hcerr : 1; /* RW, W1C */ + unsigned long lh_hcerr : 1; /* RW, W1C */ + unsigned long rh_hcerr : 1; /* RW, W1C */ + unsigned long xn_hcerr : 1; /* RW, W1C */ + unsigned long si_hcerr : 1; /* RW, W1C */ + unsigned long lb_aoerr0 : 1; /* RW, W1C */ + unsigned long gr0_aoerr0 : 1; /* RW, W1C */ + unsigned long gr1_aoerr0 : 1; /* RW, W1C */ + unsigned long lh_aoerr0 : 1; /* RW, W1C */ + unsigned long rh_aoerr0 : 1; /* RW, W1C */ + unsigned long xn_aoerr0 : 1; /* RW, W1C */ + unsigned long si_aoerr0 : 1; /* RW, W1C */ + unsigned long lb_aoerr1 : 1; /* RW, W1C */ + unsigned long gr0_aoerr1 : 1; /* RW, W1C */ + unsigned long gr1_aoerr1 : 1; /* RW, W1C */ + unsigned long lh_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_aoerr1 : 1; /* RW, W1C */ + unsigned long xn_aoerr1 : 1; /* RW, W1C */ + unsigned long si_aoerr1 : 1; /* RW, W1C */ + unsigned long rh_vpi_int : 1; /* RW, W1C */ + unsigned long system_shutdown_int : 1; /* RW, W1C */ + unsigned long lb_irq_int_0 : 1; /* RW, W1C */ + unsigned long lb_irq_int_1 : 1; /* RW, W1C */ + unsigned long lb_irq_int_2 : 1; /* RW, W1C */ + unsigned long lb_irq_int_3 : 1; /* RW, W1C */ + unsigned long lb_irq_int_4 : 1; /* RW, W1C */ + unsigned long lb_irq_int_5 : 1; /* RW, W1C */ + unsigned long lb_irq_int_6 : 1; /* RW, W1C */ + unsigned long lb_irq_int_7 : 1; /* RW, W1C */ + unsigned long lb_irq_int_8 : 1; /* RW, W1C */ + unsigned long lb_irq_int_9 : 1; /* RW, W1C */ + unsigned long lb_irq_int_10 : 1; /* RW, W1C */ + unsigned long lb_irq_int_11 : 1; /* RW, W1C */ + unsigned long lb_irq_int_12 : 1; /* RW, W1C */ + unsigned long lb_irq_int_13 : 1; /* RW, W1C */ + unsigned long lb_irq_int_14 : 1; /* RW, W1C */ + unsigned long lb_irq_int_15 : 1; /* RW, W1C */ + unsigned long l1_nmi_int : 1; /* RW, W1C */ + unsigned long stop_clock : 1; /* RW, W1C */ + unsigned long asic_to_l1 : 1; /* RW, W1C */ + unsigned long l1_to_asic : 1; /* RW, W1C */ + unsigned long ltc_int : 1; /* RW, W1C */ + unsigned long la_seq_trigger : 1; /* RW, W1C */ + unsigned long ipi_int : 1; /* RW, W1C */ + unsigned long extio_int0 : 1; /* RW, W1C */ + unsigned long extio_int1 : 1; /* RW, W1C */ + unsigned long extio_int2 : 1; /* RW, W1C */ + unsigned long extio_int3 : 1; /* RW, W1C */ + unsigned long profile_int : 1; /* RW, W1C */ + unsigned long rtc0 : 1; /* RW, W1C */ + unsigned long rtc1 : 1; /* RW, W1C */ + unsigned long rtc2 : 1; /* RW, W1C */ + unsigned long rtc3 : 1; /* RW, W1C */ + unsigned long bau_data : 1; /* RW, W1C */ + unsigned long power_management_req : 1; /* RW, W1C */ + unsigned long rsvd_57_63 : 7; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_EVENT_OCCURRED0_ALIAS */ +/* ========================================================================= */ +#define UVH_EVENT_OCCURRED0_ALIAS 0x0000000000070008UL +#define UVH_EVENT_OCCURRED0_ALIAS_32 0x005f0 + +/* ========================================================================= */ +/* UVH_GR0_TLB_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_TLB_INT0_CONFIG 0x61b00UL + +#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_GR0_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR0_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVH_GR0_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR0_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR0_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_GR0_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR0_TLB_INT0_CONFIG_P_SHFT 13 +#define UVH_GR0_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR0_TLB_INT0_CONFIG_T_SHFT 15 +#define UVH_GR0_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR0_TLB_INT0_CONFIG_M_SHFT 16 +#define UVH_GR0_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR0_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr0_tlb_int0_config_u { + unsigned long v; + struct uvh_gr0_tlb_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_GR0_TLB_INT1_CONFIG */ +/* ========================================================================= */ +#define UVH_GR0_TLB_INT1_CONFIG 0x61b40UL + +#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVH_GR0_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR0_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVH_GR0_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR0_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR0_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVH_GR0_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR0_TLB_INT1_CONFIG_P_SHFT 13 +#define UVH_GR0_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR0_TLB_INT1_CONFIG_T_SHFT 15 +#define UVH_GR0_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR0_TLB_INT1_CONFIG_M_SHFT 16 +#define UVH_GR0_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR0_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr0_tlb_int1_config_u { + unsigned long v; + struct uvh_gr0_tlb_int1_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_GR1_TLB_INT0_CONFIG */ +/* ========================================================================= */ +#define UVH_GR1_TLB_INT0_CONFIG 0x61f00UL + +#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_SHFT 0 +#define UVH_GR1_TLB_INT0_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR1_TLB_INT0_CONFIG_DM_SHFT 8 +#define UVH_GR1_TLB_INT0_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR1_TLB_INT0_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR1_TLB_INT0_CONFIG_STATUS_SHFT 12 +#define UVH_GR1_TLB_INT0_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR1_TLB_INT0_CONFIG_P_SHFT 13 +#define UVH_GR1_TLB_INT0_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR1_TLB_INT0_CONFIG_T_SHFT 15 +#define UVH_GR1_TLB_INT0_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR1_TLB_INT0_CONFIG_M_SHFT 16 +#define UVH_GR1_TLB_INT0_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR1_TLB_INT0_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr1_tlb_int0_config_u { + unsigned long v; + struct uvh_gr1_tlb_int0_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_GR1_TLB_INT1_CONFIG */ +/* ========================================================================= */ +#define UVH_GR1_TLB_INT1_CONFIG 0x61f40UL + +#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_SHFT 0 +#define UVH_GR1_TLB_INT1_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_GR1_TLB_INT1_CONFIG_DM_SHFT 8 +#define UVH_GR1_TLB_INT1_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_SHFT 11 +#define UVH_GR1_TLB_INT1_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_GR1_TLB_INT1_CONFIG_STATUS_SHFT 12 +#define UVH_GR1_TLB_INT1_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_GR1_TLB_INT1_CONFIG_P_SHFT 13 +#define UVH_GR1_TLB_INT1_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_GR1_TLB_INT1_CONFIG_T_SHFT 15 +#define UVH_GR1_TLB_INT1_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_GR1_TLB_INT1_CONFIG_M_SHFT 16 +#define UVH_GR1_TLB_INT1_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_SHFT 32 +#define UVH_GR1_TLB_INT1_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_gr1_tlb_int1_config_u { + unsigned long v; + struct uvh_gr1_tlb_int1_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPB */ +/* ========================================================================= */ +#define UVH_INT_CMPB 0x22080UL + +#define UVH_INT_CMPB_REAL_TIME_CMPB_SHFT 0 +#define UVH_INT_CMPB_REAL_TIME_CMPB_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpb_u { + unsigned long v; + struct uvh_int_cmpb_s { + unsigned long real_time_cmpb : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPC */ +/* ========================================================================= */ +#define UVH_INT_CMPC 0x22100UL + +#define UVH_INT_CMPC_REAL_TIME_CMPC_SHFT 0 +#define UVH_INT_CMPC_REAL_TIME_CMPC_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpc_u { + unsigned long v; + struct uvh_int_cmpc_s { + unsigned long real_time_cmpc : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_INT_CMPD */ +/* ========================================================================= */ +#define UVH_INT_CMPD 0x22180UL + +#define UVH_INT_CMPD_REAL_TIME_CMPD_SHFT 0 +#define UVH_INT_CMPD_REAL_TIME_CMPD_MASK 0x00ffffffffffffffUL + +union uvh_int_cmpd_u { + unsigned long v; + struct uvh_int_cmpd_s { + unsigned long real_time_cmpd : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_NODE_ID */ +/* ========================================================================= */ +#define UVH_NODE_ID 0x0UL + +#define UVH_NODE_ID_FORCE1_SHFT 0 +#define UVH_NODE_ID_FORCE1_MASK 0x0000000000000001UL +#define UVH_NODE_ID_MANUFACTURER_SHFT 1 +#define UVH_NODE_ID_MANUFACTURER_MASK 0x0000000000000ffeUL +#define UVH_NODE_ID_PART_NUMBER_SHFT 12 +#define UVH_NODE_ID_PART_NUMBER_MASK 0x000000000ffff000UL +#define UVH_NODE_ID_REVISION_SHFT 28 +#define UVH_NODE_ID_REVISION_MASK 0x00000000f0000000UL +#define UVH_NODE_ID_NODE_ID_SHFT 32 +#define UVH_NODE_ID_NODE_ID_MASK 0x00007fff00000000UL +#define UVH_NODE_ID_NODES_PER_BIT_SHFT 48 +#define UVH_NODE_ID_NODES_PER_BIT_MASK 0x007f000000000000UL +#define UVH_NODE_ID_NI_PORT_SHFT 56 +#define UVH_NODE_ID_NI_PORT_MASK 0x0f00000000000000UL + +union uvh_node_id_u { + unsigned long v; + struct uvh_node_id_s { + unsigned long force1 : 1; /* RO */ + unsigned long manufacturer : 11; /* RO */ + unsigned long part_number : 16; /* RO */ + unsigned long revision : 4; /* RO */ + unsigned long node_id : 15; /* RW */ + unsigned long rsvd_47 : 1; /* */ + unsigned long nodes_per_bit : 7; /* RW */ + unsigned long rsvd_55 : 1; /* */ + unsigned long ni_port : 4; /* RO */ + unsigned long rsvd_60_63 : 4; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR 0x16000d0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_0_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_0_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR 0x16000e0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_1_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_1_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR 0x16000f0UL + +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_SHFT 24 +#define UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR_DEST_BASE_MASK 0x00003fffff000000UL + +union uvh_rh_gam_alias210_redirect_config_2_mmr_u { + unsigned long v; + struct uvh_rh_gam_alias210_redirect_config_2_mmr_s { + unsigned long rsvd_0_23 : 24; /* */ + unsigned long dest_base : 22; /* RW */ + unsigned long rsvd_46_63: 18; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR 0x1600010UL + +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT 28 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffff0000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_SHFT 48 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_GR4_MASK 0x0001000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_SHFT 52 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_N_GRU_MASK 0x00f0000000000000UL +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_gru_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_gru_overlay_config_mmr_s { + unsigned long rsvd_0_27: 28; /* */ + unsigned long base : 18; /* RW */ + unsigned long rsvd_46_47: 2; /* */ + unsigned long gr4 : 1; /* RW */ + unsigned long rsvd_49_51: 3; /* */ + unsigned long n_gru : 4; /* RW */ + unsigned long rsvd_56_62: 7; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR */ +/* ========================================================================= */ +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR 0x1600028UL + +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT 26 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_MASK 0x00003ffffc000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_SHFT 46 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_DUAL_HUB_MASK 0x0000400000000000UL +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_SHFT 63 +#define UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_ENABLE_MASK 0x8000000000000000UL + +union uvh_rh_gam_mmr_overlay_config_mmr_u { + unsigned long v; + struct uvh_rh_gam_mmr_overlay_config_mmr_s { + unsigned long rsvd_0_25: 26; /* */ + unsigned long base : 20; /* RW */ + unsigned long dual_hub : 1; /* RW */ + unsigned long rsvd_47_62: 16; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC */ +/* ========================================================================= */ +#define UVH_RTC 0x340000UL + +#define UVH_RTC_REAL_TIME_CLOCK_SHFT 0 +#define UVH_RTC_REAL_TIME_CLOCK_MASK 0x00ffffffffffffffUL + +union uvh_rtc_u { + unsigned long v; + struct uvh_rtc_s { + unsigned long real_time_clock : 56; /* RW */ + unsigned long rsvd_56_63 : 8; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC1_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC1_INT_CONFIG 0x615c0UL + +#define UVH_RTC1_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC1_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC1_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC1_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC1_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC1_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC1_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC1_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC1_INT_CONFIG_P_SHFT 13 +#define UVH_RTC1_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC1_INT_CONFIG_T_SHFT 15 +#define UVH_RTC1_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC1_INT_CONFIG_M_SHFT 16 +#define UVH_RTC1_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC1_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC1_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc1_int_config_u { + unsigned long v; + struct uvh_rtc1_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC2_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC2_INT_CONFIG 0x61600UL + +#define UVH_RTC2_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC2_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC2_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC2_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC2_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC2_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC2_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC2_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC2_INT_CONFIG_P_SHFT 13 +#define UVH_RTC2_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC2_INT_CONFIG_T_SHFT 15 +#define UVH_RTC2_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC2_INT_CONFIG_M_SHFT 16 +#define UVH_RTC2_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC2_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC2_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc2_int_config_u { + unsigned long v; + struct uvh_rtc2_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC3_INT_CONFIG */ +/* ========================================================================= */ +#define UVH_RTC3_INT_CONFIG 0x61640UL + +#define UVH_RTC3_INT_CONFIG_VECTOR_SHFT 0 +#define UVH_RTC3_INT_CONFIG_VECTOR_MASK 0x00000000000000ffUL +#define UVH_RTC3_INT_CONFIG_DM_SHFT 8 +#define UVH_RTC3_INT_CONFIG_DM_MASK 0x0000000000000700UL +#define UVH_RTC3_INT_CONFIG_DESTMODE_SHFT 11 +#define UVH_RTC3_INT_CONFIG_DESTMODE_MASK 0x0000000000000800UL +#define UVH_RTC3_INT_CONFIG_STATUS_SHFT 12 +#define UVH_RTC3_INT_CONFIG_STATUS_MASK 0x0000000000001000UL +#define UVH_RTC3_INT_CONFIG_P_SHFT 13 +#define UVH_RTC3_INT_CONFIG_P_MASK 0x0000000000002000UL +#define UVH_RTC3_INT_CONFIG_T_SHFT 15 +#define UVH_RTC3_INT_CONFIG_T_MASK 0x0000000000008000UL +#define UVH_RTC3_INT_CONFIG_M_SHFT 16 +#define UVH_RTC3_INT_CONFIG_M_MASK 0x0000000000010000UL +#define UVH_RTC3_INT_CONFIG_APIC_ID_SHFT 32 +#define UVH_RTC3_INT_CONFIG_APIC_ID_MASK 0xffffffff00000000UL + +union uvh_rtc3_int_config_u { + unsigned long v; + struct uvh_rtc3_int_config_s { + unsigned long vector_ : 8; /* RW */ + unsigned long dm : 3; /* RW */ + unsigned long destmode : 1; /* RW */ + unsigned long status : 1; /* RO */ + unsigned long p : 1; /* RO */ + unsigned long rsvd_14 : 1; /* */ + unsigned long t : 1; /* RO */ + unsigned long m : 1; /* RW */ + unsigned long rsvd_17_31: 15; /* */ + unsigned long apic_id : 32; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_RTC_INC_RATIO */ +/* ========================================================================= */ +#define UVH_RTC_INC_RATIO 0x350000UL + +#define UVH_RTC_INC_RATIO_FRACTION_SHFT 0 +#define UVH_RTC_INC_RATIO_FRACTION_MASK 0x00000000000fffffUL +#define UVH_RTC_INC_RATIO_RATIO_SHFT 20 +#define UVH_RTC_INC_RATIO_RATIO_MASK 0x0000000000700000UL + +union uvh_rtc_inc_ratio_u { + unsigned long v; + struct uvh_rtc_inc_ratio_s { + unsigned long fraction : 20; /* RW */ + unsigned long ratio : 3; /* RW */ + unsigned long rsvd_23_63: 41; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ADDR_MAP_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ADDR_MAP_CONFIG 0xc80000UL + +#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_SHFT 0 +#define UVH_SI_ADDR_MAP_CONFIG_M_SKT_MASK 0x000000000000003fUL +#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_SHFT 8 +#define UVH_SI_ADDR_MAP_CONFIG_N_SKT_MASK 0x0000000000000f00UL + +union uvh_si_addr_map_config_u { + unsigned long v; + struct uvh_si_addr_map_config_s { + unsigned long m_skt : 6; /* RW */ + unsigned long rsvd_6_7: 2; /* */ + unsigned long n_skt : 4; /* RW */ + unsigned long rsvd_12_63: 52; /* */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS0_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS0_OVERLAY_CONFIG 0xc80008UL + +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS0_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias0_overlay_config_u { + unsigned long v; + struct uvh_si_alias0_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS1_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS1_OVERLAY_CONFIG 0xc80010UL + +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS1_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias1_overlay_config_u { + unsigned long v; + struct uvh_si_alias1_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + +/* ========================================================================= */ +/* UVH_SI_ALIAS2_OVERLAY_CONFIG */ +/* ========================================================================= */ +#define UVH_SI_ALIAS2_OVERLAY_CONFIG 0xc80018UL + +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_SHFT 24 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_BASE_MASK 0x00000000ff000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_SHFT 48 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_M_ALIAS_MASK 0x001f000000000000UL +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_SHFT 63 +#define UVH_SI_ALIAS2_OVERLAY_CONFIG_ENABLE_MASK 0x8000000000000000UL + +union uvh_si_alias2_overlay_config_u { + unsigned long v; + struct uvh_si_alias2_overlay_config_s { + unsigned long rsvd_0_23: 24; /* */ + unsigned long base : 8; /* RW */ + unsigned long rsvd_32_47: 16; /* */ + unsigned long m_alias : 5; /* RW */ + unsigned long rsvd_53_62: 10; /* */ + unsigned long enable : 1; /* RW */ + } s; +}; + + +#endif /* _ASM_IA64_UV_UV_MMRS_H */ diff --git a/arch/ia64/include/asm/vermagic.h b/arch/ia64/include/asm/vermagic.h new file mode 100644 index 000000000000..29c7424f4c25 --- /dev/null +++ b/arch/ia64/include/asm/vermagic.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#ifndef _ASM_VERMAGIC_H +#define _ASM_VERMAGIC_H + +#include + +#define MODULE_ARCH_VERMAGIC "ia64" \ + "gcc-" __stringify(__GNUC__) "." __stringify(__GNUC_MINOR__) + +#endif /* _ASM_VERMAGIC_H */ diff --git a/arch/ia64/include/asm/vga.h b/arch/ia64/include/asm/vga.h new file mode 100644 index 000000000000..64ce0b971a0a --- /dev/null +++ b/arch/ia64/include/asm/vga.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Access to VGA videoram + * + * (c) 1998 Martin Mares + * (c) 1999 Asit Mallick + * (c) 1999 Don Dugger + */ + +#ifndef __ASM_IA64_VGA_H_ +#define __ASM_IA64_VGA_H_ + +/* + * On the PC, we can just recalculate addresses and then access the + * videoram directly without any black magic. + */ + +extern unsigned long vga_console_iobase; +extern unsigned long vga_console_membase; + +#define VGA_MAP_MEM(x,s) ((unsigned long) ioremap(vga_console_membase + (x), s)) + +#define vga_readb(x) (*(x)) +#define vga_writeb(x,y) (*(y) = (x)) + +#endif /* __ASM_IA64_VGA_H_ */ diff --git a/arch/ia64/include/asm/vmalloc.h b/arch/ia64/include/asm/vmalloc.h new file mode 100644 index 000000000000..a2b51141ad28 --- /dev/null +++ b/arch/ia64/include/asm/vmalloc.h @@ -0,0 +1,4 @@ +#ifndef _ASM_IA64_VMALLOC_H +#define _ASM_IA64_VMALLOC_H + +#endif /* _ASM_IA64_VMALLOC_H */ diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h new file mode 100644 index 000000000000..6785f70d3208 --- /dev/null +++ b/arch/ia64/include/asm/xor.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Optimized RAID-5 checksumming functions for IA-64. + */ + + +extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); + +static struct xor_block_template xor_block_ia64 = { + .name = "ia64", + .do_2 = xor_ia64_2, + .do_3 = xor_ia64_3, + .do_4 = xor_ia64_4, + .do_5 = xor_ia64_5, +}; + +#define XOR_TRY_TEMPLATES xor_speed(&xor_block_ia64) diff --git a/arch/ia64/include/asm/xtp.h b/arch/ia64/include/asm/xtp.h new file mode 100644 index 000000000000..5bf1d70ad860 --- /dev/null +++ b/arch/ia64/include/asm/xtp.h @@ -0,0 +1,46 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_IA64_XTP_H +#define _ASM_IA64_XTP_H + +#include + +#ifdef CONFIG_SMP + +#define XTP_OFFSET 0x1e0008 + +#define SMP_IRQ_REDIRECTION (1 << 0) +#define SMP_IPI_REDIRECTION (1 << 1) + +extern unsigned char smp_int_redirect; + +/* + * XTP control functions: + * min_xtp : route all interrupts to this CPU + * normal_xtp: nominal XTP value + * max_xtp : never deliver interrupts to this CPU. + */ + +static inline void +min_xtp (void) +{ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + writeb(0x00, ipi_base_addr + XTP_OFFSET); /* XTP to min */ +} + +static inline void +normal_xtp (void) +{ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + writeb(0x08, ipi_base_addr + XTP_OFFSET); /* XTP normal */ +} + +static inline void +max_xtp (void) +{ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + writeb(0x0f, ipi_base_addr + XTP_OFFSET); /* Set XTP to max */ +} + +#endif /* CONFIG_SMP */ + +#endif /* _ASM_IA64_XTP_Hy */ diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild new file mode 100644 index 000000000000..3a1341e3535a --- /dev/null +++ b/arch/ia64/include/uapi/asm/Kbuild @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +generated-y += unistd_64.h diff --git a/arch/ia64/include/uapi/asm/auxvec.h b/arch/ia64/include/uapi/asm/auxvec.h new file mode 100644 index 000000000000..09969a5d2e0a --- /dev/null +++ b/arch/ia64/include/uapi/asm/auxvec.h @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_AUXVEC_H +#define _ASM_IA64_AUXVEC_H + +/* + * Architecture-neutral AT_ values are in the range 0-17. Leave some room for more of + * them, start the architecture-specific ones at 32. + */ +#define AT_SYSINFO 32 +#define AT_SYSINFO_EHDR 33 + +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ + +#endif /* _ASM_IA64_AUXVEC_H */ diff --git a/arch/ia64/include/uapi/asm/bitsperlong.h b/arch/ia64/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..1146d55563db --- /dev/null +++ b/arch/ia64/include/uapi/asm/bitsperlong.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_IA64_BITSPERLONG_H +#define __ASM_IA64_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include + +#endif /* __ASM_IA64_BITSPERLONG_H */ diff --git a/arch/ia64/include/uapi/asm/break.h b/arch/ia64/include/uapi/asm/break.h new file mode 100644 index 000000000000..4ca110f0a94b --- /dev/null +++ b/arch/ia64/include/uapi/asm/break.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_BREAK_H +#define _ASM_IA64_BREAK_H + +/* + * IA-64 Linux break numbers. + * + * Copyright (C) 1999 Hewlett-Packard Co + * Copyright (C) 1999 David Mosberger-Tang + */ + +/* + * OS-specific debug break numbers: + */ +#define __IA64_BREAK_KDB 0x80100 +#define __IA64_BREAK_KPROBE 0x81000 /* .. 0x81fff */ + +/* + * OS-specific break numbers: + */ +#define __IA64_BREAK_SYSCALL 0x100000 + +#endif /* _ASM_IA64_BREAK_H */ diff --git a/arch/ia64/include/uapi/asm/byteorder.h b/arch/ia64/include/uapi/asm/byteorder.h new file mode 100644 index 000000000000..f85d0faaaf34 --- /dev/null +++ b/arch/ia64/include/uapi/asm/byteorder.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_BYTEORDER_H +#define _ASM_IA64_BYTEORDER_H + +#include + +#endif /* _ASM_IA64_BYTEORDER_H */ diff --git a/arch/ia64/include/uapi/asm/cmpxchg.h b/arch/ia64/include/uapi/asm/cmpxchg.h new file mode 100644 index 000000000000..a59b5de6eec6 --- /dev/null +++ b/arch/ia64/include/uapi/asm/cmpxchg.h @@ -0,0 +1,138 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_IA64_CMPXCHG_H +#define _UAPI_ASM_IA64_CMPXCHG_H + +/* + * Compare/Exchange, forked from asm/intrinsics.h + * which was: + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#ifndef __ASSEMBLY__ + +#include +/* include compiler specific intrinsics */ +#include +#include + +/* + * This function doesn't exist, so you'll get a linker error if + * something tries to do an invalid xchg(). + */ +extern void ia64_xchg_called_with_bad_pointer(void); + +#define __arch_xchg(x, ptr, size) \ +({ \ + unsigned long __xchg_result; \ + \ + switch (size) { \ + case 1: \ + __xchg_result = ia64_xchg1((__u8 __force *)ptr, x); \ + break; \ + \ + case 2: \ + __xchg_result = ia64_xchg2((__u16 __force *)ptr, x); \ + break; \ + \ + case 4: \ + __xchg_result = ia64_xchg4((__u32 __force *)ptr, x); \ + break; \ + \ + case 8: \ + __xchg_result = ia64_xchg8((__u64 __force *)ptr, x); \ + break; \ + default: \ + ia64_xchg_called_with_bad_pointer(); \ + } \ + (__typeof__ (*(ptr)) __force) __xchg_result; \ +}) + +#ifndef __KERNEL__ +#define xchg(ptr, x) \ +({(__typeof__(*(ptr))) __arch_xchg((unsigned long) (x), (ptr), sizeof(*(ptr)));}) +#endif + +/* + * Atomic compare and exchange. Compare OLD with MEM, if identical, + * store NEW in MEM. Return the initial value in MEM. Success is + * indicated by comparing RETURN with OLD. + */ + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern long ia64_cmpxchg_called_with_bad_pointer(void); + +#define ia64_cmpxchg(sem, ptr, old, new, size) \ +({ \ + __u64 _o_, _r_; \ + \ + switch (size) { \ + case 1: \ + _o_ = (__u8) (long __force) (old); \ + break; \ + case 2: \ + _o_ = (__u16) (long __force) (old); \ + break; \ + case 4: \ + _o_ = (__u32) (long __force) (old); \ + break; \ + case 8: \ + _o_ = (__u64) (long __force) (old); \ + break; \ + default: \ + break; \ + } \ + switch (size) { \ + case 1: \ + _r_ = ia64_cmpxchg1_##sem((__u8 __force *) ptr, new, _o_); \ + break; \ + \ + case 2: \ + _r_ = ia64_cmpxchg2_##sem((__u16 __force *) ptr, new, _o_); \ + break; \ + \ + case 4: \ + _r_ = ia64_cmpxchg4_##sem((__u32 __force *) ptr, new, _o_); \ + break; \ + \ + case 8: \ + _r_ = ia64_cmpxchg8_##sem((__u64 __force *) ptr, new, _o_); \ + break; \ + \ + default: \ + _r_ = ia64_cmpxchg_called_with_bad_pointer(); \ + break; \ + } \ + (__typeof__(old) __force) _r_; \ +}) + +#define cmpxchg_acq(ptr, o, n) \ + ia64_cmpxchg(acq, (ptr), (o), (n), sizeof(*(ptr))) +#define cmpxchg_rel(ptr, o, n) \ + ia64_cmpxchg(rel, (ptr), (o), (n), sizeof(*(ptr))) + +/* + * Worse still - early processor implementations actually just ignored + * the acquire/release and did a full fence all the time. Unfortunately + * this meant a lot of badly written code that used .acq when they really + * wanted .rel became legacy out in the wild - so when we made a cpu + * that strictly did the .acq or .rel ... all that code started breaking - so + * we had to back-pedal and keep the "legacy" behavior of a full fence :-( + */ + +#ifndef __KERNEL__ +/* for compatibility with other platforms: */ +#define cmpxchg(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) +#define cmpxchg64(ptr, o, n) cmpxchg_acq((ptr), (o), (n)) + +#define cmpxchg_local cmpxchg +#define cmpxchg64_local cmpxchg64 +#endif + +#endif /* !__ASSEMBLY__ */ + +#endif /* _UAPI_ASM_IA64_CMPXCHG_H */ diff --git a/arch/ia64/include/uapi/asm/fcntl.h b/arch/ia64/include/uapi/asm/fcntl.h new file mode 100644 index 000000000000..7b95523efe5a --- /dev/null +++ b/arch/ia64/include/uapi/asm/fcntl.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_FCNTL_H +#define _ASM_IA64_FCNTL_H +/* + * Modified 1998-2000 + * David Mosberger-Tang , Hewlett-Packard Co. + */ + +#define force_o_largefile() \ + (personality(current->personality) != PER_LINUX32) + +#include +#include + +#endif /* _ASM_IA64_FCNTL_H */ diff --git a/arch/ia64/include/uapi/asm/fpu.h b/arch/ia64/include/uapi/asm/fpu.h new file mode 100644 index 000000000000..0df392982ce8 --- /dev/null +++ b/arch/ia64/include/uapi/asm/fpu.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_FPU_H +#define _ASM_IA64_FPU_H + +/* + * Copyright (C) 1998, 1999, 2002, 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include + +/* floating point status register: */ +#define FPSR_TRAP_VD (1 << 0) /* invalid op trap disabled */ +#define FPSR_TRAP_DD (1 << 1) /* denormal trap disabled */ +#define FPSR_TRAP_ZD (1 << 2) /* zero-divide trap disabled */ +#define FPSR_TRAP_OD (1 << 3) /* overflow trap disabled */ +#define FPSR_TRAP_UD (1 << 4) /* underflow trap disabled */ +#define FPSR_TRAP_ID (1 << 5) /* inexact trap disabled */ +#define FPSR_S0(x) ((x) << 6) +#define FPSR_S1(x) ((x) << 19) +#define FPSR_S2(x) (__IA64_UL(x) << 32) +#define FPSR_S3(x) (__IA64_UL(x) << 45) + +/* floating-point status field controls: */ +#define FPSF_FTZ (1 << 0) /* flush-to-zero */ +#define FPSF_WRE (1 << 1) /* widest-range exponent */ +#define FPSF_PC(x) (((x) & 0x3) << 2) /* precision control */ +#define FPSF_RC(x) (((x) & 0x3) << 4) /* rounding control */ +#define FPSF_TD (1 << 6) /* trap disabled */ + +/* floating-point status field flags: */ +#define FPSF_V (1 << 7) /* invalid operation flag */ +#define FPSF_D (1 << 8) /* denormal/unnormal operand flag */ +#define FPSF_Z (1 << 9) /* zero divide (IEEE) flag */ +#define FPSF_O (1 << 10) /* overflow (IEEE) flag */ +#define FPSF_U (1 << 11) /* underflow (IEEE) flag */ +#define FPSF_I (1 << 12) /* inexact (IEEE) flag) */ + +/* floating-point rounding control: */ +#define FPRC_NEAREST 0x0 +#define FPRC_NEGINF 0x1 +#define FPRC_POSINF 0x2 +#define FPRC_TRUNC 0x3 + +#define FPSF_DEFAULT (FPSF_PC (0x3) | FPSF_RC (FPRC_NEAREST)) + +/* This default value is the same as HP-UX uses. Don't change it + without a very good reason. */ +#define FPSR_DEFAULT (FPSR_TRAP_VD | FPSR_TRAP_DD | FPSR_TRAP_ZD \ + | FPSR_TRAP_OD | FPSR_TRAP_UD | FPSR_TRAP_ID \ + | FPSR_S0 (FPSF_DEFAULT) \ + | FPSR_S1 (FPSF_DEFAULT | FPSF_TD | FPSF_WRE) \ + | FPSR_S2 (FPSF_DEFAULT | FPSF_TD) \ + | FPSR_S3 (FPSF_DEFAULT | FPSF_TD)) + +# ifndef __ASSEMBLY__ + +struct ia64_fpreg { + union { + unsigned long bits[2]; + long double __dummy; /* force 16-byte alignment */ + } u; +}; + +# endif /* __ASSEMBLY__ */ + +#endif /* _ASM_IA64_FPU_H */ diff --git a/arch/ia64/include/uapi/asm/gcc_intrin.h b/arch/ia64/include/uapi/asm/gcc_intrin.h new file mode 100644 index 000000000000..ecfa3eadb217 --- /dev/null +++ b/arch/ia64/include/uapi/asm/gcc_intrin.h @@ -0,0 +1,619 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * + * Copyright (C) 2002,2003 Jun Nakajima + * Copyright (C) 2002,2003 Suresh Siddha + */ +#ifndef _UAPI_ASM_IA64_GCC_INTRIN_H +#define _UAPI_ASM_IA64_GCC_INTRIN_H + +#include +#include + +/* define this macro to get some asm stmts included in 'c' files */ +#define ASM_SUPPORTED + +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define ia64_barrier() asm volatile ("":::"memory") + +#define ia64_stop() asm volatile (";;"::) + +#define ia64_invala_gr(regnum) asm volatile ("invala.e r%0" :: "i"(regnum)) + +#define ia64_invala_fr(regnum) asm volatile ("invala.e f%0" :: "i"(regnum)) + +#define ia64_flushrs() asm volatile ("flushrs;;":::"memory") + +#define ia64_loadrs() asm volatile ("loadrs;;":::"memory") + +extern void ia64_bad_param_for_setreg (void); +extern void ia64_bad_param_for_getreg (void); + + +#define ia64_setreg(regnum, val) \ +({ \ + switch (regnum) { \ + case _IA64_REG_PSR_L: \ + asm volatile ("mov psr.l=%0" :: "r"(val) : "memory"); \ + break; \ + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ + asm volatile ("mov ar%0=%1" :: \ + "i" (regnum - _IA64_REG_AR_KR0), \ + "r"(val): "memory"); \ + break; \ + case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ + asm volatile ("mov cr%0=%1" :: \ + "i" (regnum - _IA64_REG_CR_DCR), \ + "r"(val): "memory" ); \ + break; \ + case _IA64_REG_SP: \ + asm volatile ("mov r12=%0" :: \ + "r"(val): "memory"); \ + break; \ + case _IA64_REG_GP: \ + asm volatile ("mov gp=%0" :: "r"(val) : "memory"); \ + break; \ + default: \ + ia64_bad_param_for_setreg(); \ + break; \ + } \ +}) + +#define ia64_getreg(regnum) \ +({ \ + __u64 ia64_intri_res; \ + \ + switch (regnum) { \ + case _IA64_REG_GP: \ + asm volatile ("mov %0=gp" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_IP: \ + asm volatile ("mov %0=ip" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_PSR: \ + asm volatile ("mov %0=psr" : "=r"(ia64_intri_res)); \ + break; \ + case _IA64_REG_TP: /* for current() */ \ + ia64_intri_res = ia64_r13; \ + break; \ + case _IA64_REG_AR_KR0 ... _IA64_REG_AR_EC: \ + asm volatile ("mov %0=ar%1" : "=r" (ia64_intri_res) \ + : "i"(regnum - _IA64_REG_AR_KR0)); \ + break; \ + case _IA64_REG_CR_DCR ... _IA64_REG_CR_LRR1: \ + asm volatile ("mov %0=cr%1" : "=r" (ia64_intri_res) \ + : "i" (regnum - _IA64_REG_CR_DCR)); \ + break; \ + case _IA64_REG_SP: \ + asm volatile ("mov %0=sp" : "=r" (ia64_intri_res)); \ + break; \ + default: \ + ia64_bad_param_for_getreg(); \ + break; \ + } \ + ia64_intri_res; \ +}) + +#define ia64_hint_pause 0 + +#define ia64_hint(mode) \ +({ \ + switch (mode) { \ + case ia64_hint_pause: \ + asm volatile ("hint @pause" ::: "memory"); \ + break; \ + } \ +}) + + +/* Integer values for mux1 instruction */ +#define ia64_mux1_brcst 0 +#define ia64_mux1_mix 8 +#define ia64_mux1_shuf 9 +#define ia64_mux1_alt 10 +#define ia64_mux1_rev 11 + +#define ia64_mux1(x, mode) \ +({ \ + __u64 ia64_intri_res; \ + \ + switch (mode) { \ + case ia64_mux1_brcst: \ + asm ("mux1 %0=%1,@brcst" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_mix: \ + asm ("mux1 %0=%1,@mix" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_shuf: \ + asm ("mux1 %0=%1,@shuf" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_alt: \ + asm ("mux1 %0=%1,@alt" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + case ia64_mux1_rev: \ + asm ("mux1 %0=%1,@rev" : "=r" (ia64_intri_res) : "r" (x)); \ + break; \ + } \ + ia64_intri_res; \ +}) + +#if __GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# define ia64_popcnt(x) __builtin_popcountl(x) +#else +# define ia64_popcnt(x) \ + ({ \ + __u64 ia64_intri_res; \ + asm ("popcnt %0=%1" : "=r" (ia64_intri_res) : "r" (x)); \ + \ + ia64_intri_res; \ + }) +#endif + +#define ia64_getf_exp(x) \ +({ \ + long ia64_intri_res; \ + \ + asm ("getf.exp %0=%1" : "=r"(ia64_intri_res) : "f"(x)); \ + \ + ia64_intri_res; \ +}) + +#define ia64_shrp(a, b, count) \ +({ \ + __u64 ia64_intri_res; \ + asm ("shrp %0=%1,%2,%3" : "=r"(ia64_intri_res) : "r"(a), "r"(b), "i"(count)); \ + ia64_intri_res; \ +}) + +#define ia64_ldfs(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfs %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldfd(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfd %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldfe(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldfe %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldf8(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldf8 %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_ldf_fill(regnum, x) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x)); \ +}) + +#define ia64_st4_rel_nta(m, val) \ +({ \ + asm volatile ("st4.rel.nta [%0] = %1\n\t" :: "r"(m), "r"(val)); \ +}) + +#define ia64_stfs(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfs [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stfd(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfd [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stfe(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stfe [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stf8(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stf8 [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_stf_spill(x, regnum) \ +({ \ + register double __f__ asm ("f"#regnum); \ + asm volatile ("stf.spill [%0]=%1" :: "r"(x), "f"(__f__) : "memory"); \ +}) + +#define ia64_fetchadd4_acq(p, inc) \ +({ \ + \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd4.acq %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd4_rel(p, inc) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd4.rel %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd8_acq(p, inc) \ +({ \ + \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd8.acq %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_fetchadd8_rel(p, inc) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("fetchadd8.rel %0=[%1],%2" \ + : "=r"(ia64_intri_res) : "r"(p), "i" (inc) \ + : "memory"); \ + \ + ia64_intri_res; \ +}) + +#define ia64_xchg1(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg1 %0=[%1],%2" \ + : "=r" (ia64_intri_res) : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg2(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg2 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg4(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg4 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_xchg8(ptr,x) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("xchg8 %0=[%1],%2" : "=r" (ia64_intri_res) \ + : "r" (ptr), "r" (x) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg1_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg1.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg1_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg1.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg2_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg2.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg2_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + \ + asm volatile ("cmpxchg2.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg4_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg4_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg4.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg8_acq(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + asm volatile ("cmpxchg8.acq %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_cmpxchg8_rel(ptr, new, old) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("mov ar.ccv=%0;;" :: "rO"(old)); \ + \ + asm volatile ("cmpxchg8.rel %0=[%1],%2,ar.ccv": \ + "=r"(ia64_intri_res) : "r"(ptr), "r"(new) : "memory"); \ + ia64_intri_res; \ +}) + +#define ia64_mf() asm volatile ("mf" ::: "memory") +#define ia64_mfa() asm volatile ("mf.a" ::: "memory") + +#define ia64_invala() asm volatile ("invala" ::: "memory") + +#define ia64_thash(addr) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("thash %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + +#define ia64_srlz_i() asm volatile (";; srlz.i ;;" ::: "memory") +#define ia64_srlz_d() asm volatile (";; srlz.d" ::: "memory"); + +#ifdef HAVE_SERIALIZE_DIRECTIVE +# define ia64_dv_serialize_data() asm volatile (".serialize.data"); +# define ia64_dv_serialize_instruction() asm volatile (".serialize.instruction"); +#else +# define ia64_dv_serialize_data() +# define ia64_dv_serialize_instruction() +#endif + +#define ia64_nop(x) asm volatile ("nop %0"::"i"(x)); + +#define ia64_itci(addr) asm volatile ("itc.i %0;;" :: "r"(addr) : "memory") + +#define ia64_itcd(addr) asm volatile ("itc.d %0;;" :: "r"(addr) : "memory") + + +#define ia64_itri(trnum, addr) asm volatile ("itr.i itr[%0]=%1" \ + :: "r"(trnum), "r"(addr) : "memory") + +#define ia64_itrd(trnum, addr) asm volatile ("itr.d dtr[%0]=%1" \ + :: "r"(trnum), "r"(addr) : "memory") + +#define ia64_tpa(addr) \ +({ \ + unsigned long ia64_pa; \ + asm volatile ("tpa %0 = %1" : "=r"(ia64_pa) : "r"(addr) : "memory"); \ + ia64_pa; \ +}) + +#define __ia64_set_dbr(index, val) \ + asm volatile ("mov dbr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_ibr(index, val) \ + asm volatile ("mov ibr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pkr(index, val) \ + asm volatile ("mov pkr[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pmc(index, val) \ + asm volatile ("mov pmc[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_pmd(index, val) \ + asm volatile ("mov pmd[%0]=%1" :: "r"(index), "r"(val) : "memory") + +#define ia64_set_rr(index, val) \ + asm volatile ("mov rr[%0]=%1" :: "r"(index), "r"(val) : "memory"); + +#define ia64_get_cpuid(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=cpuid[%r1]" : "=r"(ia64_intri_res) : "rO"(index)); \ + ia64_intri_res; \ +}) + +#define __ia64_get_dbr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=dbr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_ibr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=ibr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_pkr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=pkr[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_pmc(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=pmc[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + + +#define ia64_get_pmd(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=pmd[%1]" : "=r"(ia64_intri_res) : "r"(index)); \ + ia64_intri_res; \ +}) + +#define ia64_get_rr(index) \ +({ \ + unsigned long ia64_intri_res; \ + asm volatile ("mov %0=rr[%1]" : "=r"(ia64_intri_res) : "r" (index)); \ + ia64_intri_res; \ +}) + +#define ia64_fc(addr) asm volatile ("fc %0" :: "r"(addr) : "memory") + + +#define ia64_sync_i() asm volatile (";; sync.i" ::: "memory") + +#define ia64_ssm(mask) asm volatile ("ssm %0":: "i"((mask)) : "memory") +#define ia64_rsm(mask) asm volatile ("rsm %0":: "i"((mask)) : "memory") +#define ia64_sum(mask) asm volatile ("sum %0":: "i"((mask)) : "memory") +#define ia64_rum(mask) asm volatile ("rum %0":: "i"((mask)) : "memory") + +#define ia64_ptce(addr) asm volatile ("ptc.e %0" :: "r"(addr)) + +#define ia64_ptcga(addr, size) \ +do { \ + asm volatile ("ptc.ga %0,%1" :: "r"(addr), "r"(size) : "memory"); \ + ia64_dv_serialize_data(); \ +} while (0) + +#define ia64_ptcl(addr, size) \ +do { \ + asm volatile ("ptc.l %0,%1" :: "r"(addr), "r"(size) : "memory"); \ + ia64_dv_serialize_data(); \ +} while (0) + +#define ia64_ptri(addr, size) \ + asm volatile ("ptr.i %0,%1" :: "r"(addr), "r"(size) : "memory") + +#define ia64_ptrd(addr, size) \ + asm volatile ("ptr.d %0,%1" :: "r"(addr), "r"(size) : "memory") + +#define ia64_ttag(addr) \ +({ \ + __u64 ia64_intri_res; \ + asm volatile ("ttag %0=%1" : "=r"(ia64_intri_res) : "r" (addr)); \ + ia64_intri_res; \ +}) + + +/* Values for lfhint in ia64_lfetch and ia64_lfetch_fault */ + +#define ia64_lfhint_none 0 +#define ia64_lfhint_nt1 1 +#define ia64_lfhint_nt2 2 +#define ia64_lfhint_nta 3 + +#define ia64_lfetch(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.nt1 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.nt2 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.nta [%0]" : : "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_excl(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.excl [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.excl.nt1 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.excl.nt2 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.excl.nta [%0]" :: "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_fault(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.fault [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.fault.nt1 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.fault.nt2 [%0]" : : "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.fault.nta [%0]" : : "r"(y)); \ + break; \ + } \ +}) + +#define ia64_lfetch_fault_excl(lfhint, y) \ +({ \ + switch (lfhint) { \ + case ia64_lfhint_none: \ + asm volatile ("lfetch.fault.excl [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt1: \ + asm volatile ("lfetch.fault.excl.nt1 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nt2: \ + asm volatile ("lfetch.fault.excl.nt2 [%0]" :: "r"(y)); \ + break; \ + case ia64_lfhint_nta: \ + asm volatile ("lfetch.fault.excl.nta [%0]" :: "r"(y)); \ + break; \ + } \ +}) + +#define ia64_intrin_local_irq_restore(x) \ +do { \ + asm volatile (";; cmp.ne p6,p7=%0,r0;;" \ + "(p6) ssm psr.i;" \ + "(p7) rsm psr.i;;" \ + "(p6) srlz.d" \ + :: "r"((x)) : "p6", "p7", "memory"); \ +} while (0) + +#endif /* _UAPI_ASM_IA64_GCC_INTRIN_H */ diff --git a/arch/ia64/include/uapi/asm/ia64regs.h b/arch/ia64/include/uapi/asm/ia64regs.h new file mode 100644 index 000000000000..d7d10cec8b9f --- /dev/null +++ b/arch/ia64/include/uapi/asm/ia64regs.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2002,2003 Intel Corp. + * Jun Nakajima + * Suresh Siddha + */ + +#ifndef _ASM_IA64_IA64REGS_H +#define _ASM_IA64_IA64REGS_H + +/* + * Register Names for getreg() and setreg(). + * + * The "magic" numbers happen to match the values used by the Intel compiler's + * getreg()/setreg() intrinsics. + */ + +/* Special Registers */ + +#define _IA64_REG_IP 1016 /* getreg only */ +#define _IA64_REG_PSR 1019 +#define _IA64_REG_PSR_L 1019 + +/* General Integer Registers */ + +#define _IA64_REG_GP 1025 /* R1 */ +#define _IA64_REG_R8 1032 /* R8 */ +#define _IA64_REG_R9 1033 /* R9 */ +#define _IA64_REG_SP 1036 /* R12 */ +#define _IA64_REG_TP 1037 /* R13 */ + +/* Application Registers */ + +#define _IA64_REG_AR_KR0 3072 +#define _IA64_REG_AR_KR1 3073 +#define _IA64_REG_AR_KR2 3074 +#define _IA64_REG_AR_KR3 3075 +#define _IA64_REG_AR_KR4 3076 +#define _IA64_REG_AR_KR5 3077 +#define _IA64_REG_AR_KR6 3078 +#define _IA64_REG_AR_KR7 3079 +#define _IA64_REG_AR_RSC 3088 +#define _IA64_REG_AR_BSP 3089 +#define _IA64_REG_AR_BSPSTORE 3090 +#define _IA64_REG_AR_RNAT 3091 +#define _IA64_REG_AR_FCR 3093 +#define _IA64_REG_AR_EFLAG 3096 +#define _IA64_REG_AR_CSD 3097 +#define _IA64_REG_AR_SSD 3098 +#define _IA64_REG_AR_CFLAG 3099 +#define _IA64_REG_AR_FSR 3100 +#define _IA64_REG_AR_FIR 3101 +#define _IA64_REG_AR_FDR 3102 +#define _IA64_REG_AR_CCV 3104 +#define _IA64_REG_AR_UNAT 3108 +#define _IA64_REG_AR_FPSR 3112 +#define _IA64_REG_AR_ITC 3116 +#define _IA64_REG_AR_PFS 3136 +#define _IA64_REG_AR_LC 3137 +#define _IA64_REG_AR_EC 3138 + +/* Control Registers */ + +#define _IA64_REG_CR_DCR 4096 +#define _IA64_REG_CR_ITM 4097 +#define _IA64_REG_CR_IVA 4098 +#define _IA64_REG_CR_PTA 4104 +#define _IA64_REG_CR_IPSR 4112 +#define _IA64_REG_CR_ISR 4113 +#define _IA64_REG_CR_IIP 4115 +#define _IA64_REG_CR_IFA 4116 +#define _IA64_REG_CR_ITIR 4117 +#define _IA64_REG_CR_IIPA 4118 +#define _IA64_REG_CR_IFS 4119 +#define _IA64_REG_CR_IIM 4120 +#define _IA64_REG_CR_IHA 4121 +#define _IA64_REG_CR_LID 4160 +#define _IA64_REG_CR_IVR 4161 /* getreg only */ +#define _IA64_REG_CR_TPR 4162 +#define _IA64_REG_CR_EOI 4163 +#define _IA64_REG_CR_IRR0 4164 /* getreg only */ +#define _IA64_REG_CR_IRR1 4165 /* getreg only */ +#define _IA64_REG_CR_IRR2 4166 /* getreg only */ +#define _IA64_REG_CR_IRR3 4167 /* getreg only */ +#define _IA64_REG_CR_ITV 4168 +#define _IA64_REG_CR_PMV 4169 +#define _IA64_REG_CR_CMCV 4170 +#define _IA64_REG_CR_LRR0 4176 +#define _IA64_REG_CR_LRR1 4177 + +/* Indirect Registers for getindreg() and setindreg() */ + +#define _IA64_REG_INDR_CPUID 9000 /* getindreg only */ +#define _IA64_REG_INDR_DBR 9001 +#define _IA64_REG_INDR_IBR 9002 +#define _IA64_REG_INDR_PKR 9003 +#define _IA64_REG_INDR_PMC 9004 +#define _IA64_REG_INDR_PMD 9005 +#define _IA64_REG_INDR_RR 9006 + +#endif /* _ASM_IA64_IA64REGS_H */ diff --git a/arch/ia64/include/uapi/asm/intrinsics.h b/arch/ia64/include/uapi/asm/intrinsics.h new file mode 100644 index 000000000000..63f27c4ec739 --- /dev/null +++ b/arch/ia64/include/uapi/asm/intrinsics.h @@ -0,0 +1,82 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Compiler-dependent intrinsics. + * + * Copyright (C) 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _UAPI_ASM_IA64_INTRINSICS_H +#define _UAPI_ASM_IA64_INTRINSICS_H + + +#ifndef __ASSEMBLY__ + +#include +/* include compiler specific intrinsics */ +#include +#include +#include + +#define ia64_set_rr0_to_rr4(val0, val1, val2, val3, val4) \ +do { \ + ia64_set_rr(0x0000000000000000UL, (val0)); \ + ia64_set_rr(0x2000000000000000UL, (val1)); \ + ia64_set_rr(0x4000000000000000UL, (val2)); \ + ia64_set_rr(0x6000000000000000UL, (val3)); \ + ia64_set_rr(0x8000000000000000UL, (val4)); \ +} while (0) + +/* + * Force an unresolved reference if someone tries to use + * ia64_fetch_and_add() with a bad value. + */ +extern unsigned long __bad_size_for_ia64_fetch_and_add (void); +extern unsigned long __bad_increment_for_ia64_fetch_and_add (void); + +#define IA64_FETCHADD(tmp,v,n,sz,sem) \ +({ \ + switch (sz) { \ + case 4: \ + tmp = ia64_fetchadd4_##sem((unsigned int *) v, n); \ + break; \ + \ + case 8: \ + tmp = ia64_fetchadd8_##sem((unsigned long *) v, n); \ + break; \ + \ + default: \ + __bad_size_for_ia64_fetch_and_add(); \ + } \ +}) + +#define ia64_fetchadd(i,v,sem) \ +({ \ + __u64 _tmp; \ + volatile __typeof__(*(v)) *_v = (v); \ + /* Can't use a switch () here: gcc isn't always smart enough for that... */ \ + if ((i) == -16) \ + IA64_FETCHADD(_tmp, _v, -16, sizeof(*(v)), sem); \ + else if ((i) == -8) \ + IA64_FETCHADD(_tmp, _v, -8, sizeof(*(v)), sem); \ + else if ((i) == -4) \ + IA64_FETCHADD(_tmp, _v, -4, sizeof(*(v)), sem); \ + else if ((i) == -1) \ + IA64_FETCHADD(_tmp, _v, -1, sizeof(*(v)), sem); \ + else if ((i) == 1) \ + IA64_FETCHADD(_tmp, _v, 1, sizeof(*(v)), sem); \ + else if ((i) == 4) \ + IA64_FETCHADD(_tmp, _v, 4, sizeof(*(v)), sem); \ + else if ((i) == 8) \ + IA64_FETCHADD(_tmp, _v, 8, sizeof(*(v)), sem); \ + else if ((i) == 16) \ + IA64_FETCHADD(_tmp, _v, 16, sizeof(*(v)), sem); \ + else \ + _tmp = __bad_increment_for_ia64_fetch_and_add(); \ + (__typeof__(*(v))) (_tmp); /* return old value */ \ +}) + +#define ia64_fetch_and_add(i,v) (ia64_fetchadd(i, v, rel) + (i)) /* return new value */ + +#endif + +#endif /* _UAPI_ASM_IA64_INTRINSICS_H */ diff --git a/arch/ia64/include/uapi/asm/mman.h b/arch/ia64/include/uapi/asm/mman.h new file mode 100644 index 000000000000..ce0cc3d7509e --- /dev/null +++ b/arch/ia64/include/uapi/asm/mman.h @@ -0,0 +1,17 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_MMAN_H +#define _UAPI_ASM_IA64_MMAN_H + + +#include + +#define MAP_GROWSUP 0x0200 /* register stack-like segment */ + + +#endif /* _UAPI_ASM_IA64_MMAN_H */ diff --git a/arch/ia64/include/uapi/asm/param.h b/arch/ia64/include/uapi/asm/param.h new file mode 100644 index 000000000000..123ab45940b4 --- /dev/null +++ b/arch/ia64/include/uapi/asm/param.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Fundamental kernel parameters. + * + * Based on . + * + * Modified 1998, 1999, 2002-2003 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_PARAM_H +#define _UAPI_ASM_IA64_PARAM_H + + +#define EXEC_PAGESIZE 65536 + +#ifndef NOGROUP +# define NOGROUP (-1) +#endif + +#define MAXHOSTNAMELEN 64 /* max length of hostname */ + +#ifndef __KERNEL__ + /* + * Technically, this is wrong, but some old apps still refer to it. The proper way to + * get the HZ value is via sysconf(_SC_CLK_TCK). + */ +# define HZ 1024 +#endif + +#endif /* _UAPI_ASM_IA64_PARAM_H */ diff --git a/arch/ia64/include/uapi/asm/posix_types.h b/arch/ia64/include/uapi/asm/posix_types.h new file mode 100644 index 000000000000..bded40f7defe --- /dev/null +++ b/arch/ia64/include/uapi/asm/posix_types.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_POSIX_TYPES_H +#define _ASM_IA64_POSIX_TYPES_H + +typedef unsigned long __kernel_sigset_t; /* at least 32 bits */ + +#include + +#endif /* _ASM_IA64_POSIX_TYPES_H */ diff --git a/arch/ia64/include/uapi/asm/ptrace.h b/arch/ia64/include/uapi/asm/ptrace.h new file mode 100644 index 000000000000..f52655b44414 --- /dev/null +++ b/arch/ia64/include/uapi/asm/ptrace.h @@ -0,0 +1,248 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 1998-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2003 Intel Co + * Suresh Siddha + * Fenghua Yu + * Arun Sharma + * + * 12/07/98 S. Eranian added pt_regs & switch_stack + * 12/21/98 D. Mosberger updated to match latest code + * 6/17/99 D. Mosberger added second unat member to "struct switch_stack" + * + */ +#ifndef _UAPI_ASM_IA64_PTRACE_H +#define _UAPI_ASM_IA64_PTRACE_H + +/* + * When a user process is blocked, its state looks as follows: + * + * +----------------------+ ------- IA64_STK_OFFSET + * | | ^ + * | struct pt_regs | | + * | | | + * +----------------------+ | + * | | | + * | memory stack | | + * | (growing downwards) | | + * //.....................// | + * | + * //.....................// | + * | | | + * +----------------------+ | + * | struct switch_stack | | + * | | | + * +----------------------+ | + * | | | + * //.....................// | + * | + * //.....................// | + * | | | + * | register stack | | + * | (growing upwards) | | + * | | | + * +----------------------+ | --- IA64_RBS_OFFSET + * | struct thread_info | | ^ + * +----------------------+ | | + * | | | | + * | struct task_struct | | | + * current -> | | | | + * +----------------------+ ------- + * + * Note that ar.ec is not saved explicitly in pt_reg or switch_stack. + * This is because ar.ec is saved as part of ar.pfs. + */ + + +#include + + +#ifndef __ASSEMBLY__ + +/* + * This struct defines the way the registers are saved on system + * calls. + * + * We don't save all floating point register because the kernel + * is compiled to use only a very small subset, so the other are + * untouched. + * + * THIS STRUCTURE MUST BE A MULTIPLE 16-BYTE IN SIZE + * (because the memory stack pointer MUST ALWAYS be aligned this way) + * + */ +struct pt_regs { + /* The following registers are saved by SAVE_MIN: */ + unsigned long b6; /* scratch */ + unsigned long b7; /* scratch */ + + unsigned long ar_csd; /* used by cmp8xchg16 (scratch) */ + unsigned long ar_ssd; /* reserved for future use (scratch) */ + + unsigned long r8; /* scratch (return value register 0) */ + unsigned long r9; /* scratch (return value register 1) */ + unsigned long r10; /* scratch (return value register 2) */ + unsigned long r11; /* scratch (return value register 3) */ + + unsigned long cr_ipsr; /* interrupted task's psr */ + unsigned long cr_iip; /* interrupted task's instruction pointer */ + /* + * interrupted task's function state; if bit 63 is cleared, it + * contains syscall's ar.pfs.pfm: + */ + unsigned long cr_ifs; + + unsigned long ar_unat; /* interrupted task's NaT register (preserved) */ + unsigned long ar_pfs; /* prev function state */ + unsigned long ar_rsc; /* RSE configuration */ + /* The following two are valid only if cr_ipsr.cpl > 0 || ti->flags & _TIF_MCA_INIT */ + unsigned long ar_rnat; /* RSE NaT */ + unsigned long ar_bspstore; /* RSE bspstore */ + + unsigned long pr; /* 64 predicate registers (1 bit each) */ + unsigned long b0; /* return pointer (bp) */ + unsigned long loadrs; /* size of dirty partition << 16 */ + + unsigned long r1; /* the gp pointer */ + unsigned long r12; /* interrupted task's memory stack pointer */ + unsigned long r13; /* thread pointer */ + + unsigned long ar_fpsr; /* floating point status (preserved) */ + unsigned long r15; /* scratch */ + + /* The remaining registers are NOT saved for system calls. */ + + unsigned long r14; /* scratch */ + unsigned long r2; /* scratch */ + unsigned long r3; /* scratch */ + + /* The following registers are saved by SAVE_REST: */ + unsigned long r16; /* scratch */ + unsigned long r17; /* scratch */ + unsigned long r18; /* scratch */ + unsigned long r19; /* scratch */ + unsigned long r20; /* scratch */ + unsigned long r21; /* scratch */ + unsigned long r22; /* scratch */ + unsigned long r23; /* scratch */ + unsigned long r24; /* scratch */ + unsigned long r25; /* scratch */ + unsigned long r26; /* scratch */ + unsigned long r27; /* scratch */ + unsigned long r28; /* scratch */ + unsigned long r29; /* scratch */ + unsigned long r30; /* scratch */ + unsigned long r31; /* scratch */ + + unsigned long ar_ccv; /* compare/exchange value (scratch) */ + + /* + * Floating point registers that the kernel considers scratch: + */ + struct ia64_fpreg f6; /* scratch */ + struct ia64_fpreg f7; /* scratch */ + struct ia64_fpreg f8; /* scratch */ + struct ia64_fpreg f9; /* scratch */ + struct ia64_fpreg f10; /* scratch */ + struct ia64_fpreg f11; /* scratch */ +}; + +/* + * This structure contains the addition registers that need to + * preserved across a context switch. This generally consists of + * "preserved" registers. + */ +struct switch_stack { + unsigned long caller_unat; /* user NaT collection register (preserved) */ + unsigned long ar_fpsr; /* floating-point status register */ + + struct ia64_fpreg f2; /* preserved */ + struct ia64_fpreg f3; /* preserved */ + struct ia64_fpreg f4; /* preserved */ + struct ia64_fpreg f5; /* preserved */ + + struct ia64_fpreg f12; /* scratch, but untouched by kernel */ + struct ia64_fpreg f13; /* scratch, but untouched by kernel */ + struct ia64_fpreg f14; /* scratch, but untouched by kernel */ + struct ia64_fpreg f15; /* scratch, but untouched by kernel */ + struct ia64_fpreg f16; /* preserved */ + struct ia64_fpreg f17; /* preserved */ + struct ia64_fpreg f18; /* preserved */ + struct ia64_fpreg f19; /* preserved */ + struct ia64_fpreg f20; /* preserved */ + struct ia64_fpreg f21; /* preserved */ + struct ia64_fpreg f22; /* preserved */ + struct ia64_fpreg f23; /* preserved */ + struct ia64_fpreg f24; /* preserved */ + struct ia64_fpreg f25; /* preserved */ + struct ia64_fpreg f26; /* preserved */ + struct ia64_fpreg f27; /* preserved */ + struct ia64_fpreg f28; /* preserved */ + struct ia64_fpreg f29; /* preserved */ + struct ia64_fpreg f30; /* preserved */ + struct ia64_fpreg f31; /* preserved */ + + unsigned long r4; /* preserved */ + unsigned long r5; /* preserved */ + unsigned long r6; /* preserved */ + unsigned long r7; /* preserved */ + + unsigned long b0; /* so we can force a direct return in copy_thread */ + unsigned long b1; + unsigned long b2; + unsigned long b3; + unsigned long b4; + unsigned long b5; + + unsigned long ar_pfs; /* previous function state */ + unsigned long ar_lc; /* loop counter (preserved) */ + unsigned long ar_unat; /* NaT bits for r4-r7 */ + unsigned long ar_rnat; /* RSE NaT collection register */ + unsigned long ar_bspstore; /* RSE dirty base (preserved) */ + unsigned long pr; /* 64 predicate registers (1 bit each) */ +}; + + +/* pt_all_user_regs is used for PTRACE_GETREGS PTRACE_SETREGS */ +struct pt_all_user_regs { + unsigned long nat; + unsigned long cr_iip; + unsigned long cfm; + unsigned long cr_ipsr; + unsigned long pr; + + unsigned long gr[32]; + unsigned long br[8]; + unsigned long ar[128]; + struct ia64_fpreg fr[128]; +}; + +#endif /* !__ASSEMBLY__ */ + +/* indices to application-registers array in pt_all_user_regs */ +#define PT_AUR_RSC 16 +#define PT_AUR_BSP 17 +#define PT_AUR_BSPSTORE 18 +#define PT_AUR_RNAT 19 +#define PT_AUR_CCV 32 +#define PT_AUR_UNAT 36 +#define PT_AUR_FPSR 40 +#define PT_AUR_PFS 64 +#define PT_AUR_LC 65 +#define PT_AUR_EC 66 + +/* + * The numbers chosen here are somewhat arbitrary but absolutely MUST + * not overlap with any of the number assigned in . + */ +#define PTRACE_SINGLEBLOCK 12 /* resume execution until next branch */ +#define PTRACE_OLD_GETSIGINFO 13 /* (replaced by PTRACE_GETSIGINFO in ) */ +#define PTRACE_OLD_SETSIGINFO 14 /* (replaced by PTRACE_SETSIGINFO in ) */ +#define PTRACE_GETREGS 18 /* get all registers (pt_all_user_regs) in one shot */ +#define PTRACE_SETREGS 19 /* set all registers (pt_all_user_regs) in one shot */ + +#define PTRACE_OLDSETOPTIONS 21 + +#endif /* _UAPI_ASM_IA64_PTRACE_H */ diff --git a/arch/ia64/include/uapi/asm/ptrace_offsets.h b/arch/ia64/include/uapi/asm/ptrace_offsets.h new file mode 100644 index 000000000000..2847c18139ef --- /dev/null +++ b/arch/ia64/include/uapi/asm/ptrace_offsets.h @@ -0,0 +1,269 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_PTRACE_OFFSETS_H +#define _ASM_IA64_PTRACE_OFFSETS_H + +/* + * Copyright (C) 1999, 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +/* + * The "uarea" that can be accessed via PEEKUSER and POKEUSER is a + * virtual structure that would have the following definition: + * + * struct uarea { + * struct ia64_fpreg fph[96]; // f32-f127 + * unsigned long nat_bits; + * unsigned long empty1; + * struct ia64_fpreg f2; // f2-f5 + * : + * struct ia64_fpreg f5; + * struct ia64_fpreg f10; // f10-f31 + * : + * struct ia64_fpreg f31; + * unsigned long r4; // r4-r7 + * : + * unsigned long r7; + * unsigned long b1; // b1-b5 + * : + * unsigned long b5; + * unsigned long ar_ec; + * unsigned long ar_lc; + * unsigned long empty2[5]; + * unsigned long cr_ipsr; + * unsigned long cr_iip; + * unsigned long cfm; + * unsigned long ar_unat; + * unsigned long ar_pfs; + * unsigned long ar_rsc; + * unsigned long ar_rnat; + * unsigned long ar_bspstore; + * unsigned long pr; + * unsigned long b6; + * unsigned long ar_bsp; + * unsigned long r1; + * unsigned long r2; + * unsigned long r3; + * unsigned long r12; + * unsigned long r13; + * unsigned long r14; + * unsigned long r15; + * unsigned long r8; + * unsigned long r9; + * unsigned long r10; + * unsigned long r11; + * unsigned long r16; + * : + * unsigned long r31; + * unsigned long ar_ccv; + * unsigned long ar_fpsr; + * unsigned long b0; + * unsigned long b7; + * unsigned long f6; + * unsigned long f7; + * unsigned long f8; + * unsigned long f9; + * unsigned long ar_csd; + * unsigned long ar_ssd; + * unsigned long rsvd1[710]; + * unsigned long dbr[8]; + * unsigned long rsvd2[504]; + * unsigned long ibr[8]; + * unsigned long rsvd3[504]; + * unsigned long pmd[4]; + * } + */ + +/* fph: */ +#define PT_F32 0x0000 +#define PT_F33 0x0010 +#define PT_F34 0x0020 +#define PT_F35 0x0030 +#define PT_F36 0x0040 +#define PT_F37 0x0050 +#define PT_F38 0x0060 +#define PT_F39 0x0070 +#define PT_F40 0x0080 +#define PT_F41 0x0090 +#define PT_F42 0x00a0 +#define PT_F43 0x00b0 +#define PT_F44 0x00c0 +#define PT_F45 0x00d0 +#define PT_F46 0x00e0 +#define PT_F47 0x00f0 +#define PT_F48 0x0100 +#define PT_F49 0x0110 +#define PT_F50 0x0120 +#define PT_F51 0x0130 +#define PT_F52 0x0140 +#define PT_F53 0x0150 +#define PT_F54 0x0160 +#define PT_F55 0x0170 +#define PT_F56 0x0180 +#define PT_F57 0x0190 +#define PT_F58 0x01a0 +#define PT_F59 0x01b0 +#define PT_F60 0x01c0 +#define PT_F61 0x01d0 +#define PT_F62 0x01e0 +#define PT_F63 0x01f0 +#define PT_F64 0x0200 +#define PT_F65 0x0210 +#define PT_F66 0x0220 +#define PT_F67 0x0230 +#define PT_F68 0x0240 +#define PT_F69 0x0250 +#define PT_F70 0x0260 +#define PT_F71 0x0270 +#define PT_F72 0x0280 +#define PT_F73 0x0290 +#define PT_F74 0x02a0 +#define PT_F75 0x02b0 +#define PT_F76 0x02c0 +#define PT_F77 0x02d0 +#define PT_F78 0x02e0 +#define PT_F79 0x02f0 +#define PT_F80 0x0300 +#define PT_F81 0x0310 +#define PT_F82 0x0320 +#define PT_F83 0x0330 +#define PT_F84 0x0340 +#define PT_F85 0x0350 +#define PT_F86 0x0360 +#define PT_F87 0x0370 +#define PT_F88 0x0380 +#define PT_F89 0x0390 +#define PT_F90 0x03a0 +#define PT_F91 0x03b0 +#define PT_F92 0x03c0 +#define PT_F93 0x03d0 +#define PT_F94 0x03e0 +#define PT_F95 0x03f0 +#define PT_F96 0x0400 +#define PT_F97 0x0410 +#define PT_F98 0x0420 +#define PT_F99 0x0430 +#define PT_F100 0x0440 +#define PT_F101 0x0450 +#define PT_F102 0x0460 +#define PT_F103 0x0470 +#define PT_F104 0x0480 +#define PT_F105 0x0490 +#define PT_F106 0x04a0 +#define PT_F107 0x04b0 +#define PT_F108 0x04c0 +#define PT_F109 0x04d0 +#define PT_F110 0x04e0 +#define PT_F111 0x04f0 +#define PT_F112 0x0500 +#define PT_F113 0x0510 +#define PT_F114 0x0520 +#define PT_F115 0x0530 +#define PT_F116 0x0540 +#define PT_F117 0x0550 +#define PT_F118 0x0560 +#define PT_F119 0x0570 +#define PT_F120 0x0580 +#define PT_F121 0x0590 +#define PT_F122 0x05a0 +#define PT_F123 0x05b0 +#define PT_F124 0x05c0 +#define PT_F125 0x05d0 +#define PT_F126 0x05e0 +#define PT_F127 0x05f0 + +#define PT_NAT_BITS 0x0600 + +#define PT_F2 0x0610 +#define PT_F3 0x0620 +#define PT_F4 0x0630 +#define PT_F5 0x0640 +#define PT_F10 0x0650 +#define PT_F11 0x0660 +#define PT_F12 0x0670 +#define PT_F13 0x0680 +#define PT_F14 0x0690 +#define PT_F15 0x06a0 +#define PT_F16 0x06b0 +#define PT_F17 0x06c0 +#define PT_F18 0x06d0 +#define PT_F19 0x06e0 +#define PT_F20 0x06f0 +#define PT_F21 0x0700 +#define PT_F22 0x0710 +#define PT_F23 0x0720 +#define PT_F24 0x0730 +#define PT_F25 0x0740 +#define PT_F26 0x0750 +#define PT_F27 0x0760 +#define PT_F28 0x0770 +#define PT_F29 0x0780 +#define PT_F30 0x0790 +#define PT_F31 0x07a0 +#define PT_R4 0x07b0 +#define PT_R5 0x07b8 +#define PT_R6 0x07c0 +#define PT_R7 0x07c8 + +#define PT_B1 0x07d8 +#define PT_B2 0x07e0 +#define PT_B3 0x07e8 +#define PT_B4 0x07f0 +#define PT_B5 0x07f8 + +#define PT_AR_EC 0x0800 +#define PT_AR_LC 0x0808 + +#define PT_CR_IPSR 0x0830 +#define PT_CR_IIP 0x0838 +#define PT_CFM 0x0840 +#define PT_AR_UNAT 0x0848 +#define PT_AR_PFS 0x0850 +#define PT_AR_RSC 0x0858 +#define PT_AR_RNAT 0x0860 +#define PT_AR_BSPSTORE 0x0868 +#define PT_PR 0x0870 +#define PT_B6 0x0878 +#define PT_AR_BSP 0x0880 /* note: this points to the *end* of the backing store! */ +#define PT_R1 0x0888 +#define PT_R2 0x0890 +#define PT_R3 0x0898 +#define PT_R12 0x08a0 +#define PT_R13 0x08a8 +#define PT_R14 0x08b0 +#define PT_R15 0x08b8 +#define PT_R8 0x08c0 +#define PT_R9 0x08c8 +#define PT_R10 0x08d0 +#define PT_R11 0x08d8 +#define PT_R16 0x08e0 +#define PT_R17 0x08e8 +#define PT_R18 0x08f0 +#define PT_R19 0x08f8 +#define PT_R20 0x0900 +#define PT_R21 0x0908 +#define PT_R22 0x0910 +#define PT_R23 0x0918 +#define PT_R24 0x0920 +#define PT_R25 0x0928 +#define PT_R26 0x0930 +#define PT_R27 0x0938 +#define PT_R28 0x0940 +#define PT_R29 0x0948 +#define PT_R30 0x0950 +#define PT_R31 0x0958 +#define PT_AR_CCV 0x0960 +#define PT_AR_FPSR 0x0968 +#define PT_B0 0x0970 +#define PT_B7 0x0978 +#define PT_F6 0x0980 +#define PT_F7 0x0990 +#define PT_F8 0x09a0 +#define PT_F9 0x09b0 +#define PT_AR_CSD 0x09c0 +#define PT_AR_SSD 0x09c8 + +#define PT_DBR 0x2000 /* data breakpoint registers */ +#define PT_IBR 0x3000 /* instruction breakpoint registers */ +#define PT_PMD 0x4000 /* performance monitoring counters */ + +#endif /* _ASM_IA64_PTRACE_OFFSETS_H */ diff --git a/arch/ia64/include/uapi/asm/resource.h b/arch/ia64/include/uapi/asm/resource.h new file mode 100644 index 000000000000..d488d2b22ac4 --- /dev/null +++ b/arch/ia64/include/uapi/asm/resource.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_RESOURCE_H +#define _ASM_IA64_RESOURCE_H + +#include +#include + +#endif /* _ASM_IA64_RESOURCE_H */ diff --git a/arch/ia64/include/uapi/asm/rse.h b/arch/ia64/include/uapi/asm/rse.h new file mode 100644 index 000000000000..6d260af571c5 --- /dev/null +++ b/arch/ia64/include/uapi/asm/rse.h @@ -0,0 +1,67 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_RSE_H +#define _ASM_IA64_RSE_H + +/* + * Copyright (C) 1998, 1999 Hewlett-Packard Co + * Copyright (C) 1998, 1999 David Mosberger-Tang + * + * Register stack engine related helper functions. This file may be + * used in applications, so be careful about the name-space and give + * some consideration to non-GNU C compilers (though __inline__ is + * fine). + */ + +static __inline__ unsigned long +ia64_rse_slot_num (unsigned long *addr) +{ + return (((unsigned long) addr) >> 3) & 0x3f; +} + +/* + * Return TRUE if ADDR is the address of an RNAT slot. + */ +static __inline__ unsigned long +ia64_rse_is_rnat_slot (unsigned long *addr) +{ + return ia64_rse_slot_num(addr) == 0x3f; +} + +/* + * Returns the address of the RNAT slot that covers the slot at + * address SLOT_ADDR. + */ +static __inline__ unsigned long * +ia64_rse_rnat_addr (unsigned long *slot_addr) +{ + return (unsigned long *) ((unsigned long) slot_addr | (0x3f << 3)); +} + +/* + * Calculate the number of registers in the dirty partition starting at BSPSTORE and + * ending at BSP. This isn't simply (BSP-BSPSTORE)/8 because every 64th slot stores + * ar.rnat. + */ +static __inline__ unsigned long +ia64_rse_num_regs (unsigned long *bspstore, unsigned long *bsp) +{ + unsigned long slots = (bsp - bspstore); + + return slots - (ia64_rse_slot_num(bspstore) + slots)/0x40; +} + +/* + * The inverse of the above: given bspstore and the number of + * registers, calculate ar.bsp. + */ +static __inline__ unsigned long * +ia64_rse_skip_regs (unsigned long *addr, long num_regs) +{ + long delta = ia64_rse_slot_num(addr) + num_regs; + + if (num_regs < 0) + delta -= 0x3e; + return addr + num_regs + delta/0x3f; +} + +#endif /* _ASM_IA64_RSE_H */ diff --git a/arch/ia64/include/uapi/asm/setup.h b/arch/ia64/include/uapi/asm/setup.h new file mode 100644 index 000000000000..8d13ce8fb03a --- /dev/null +++ b/arch/ia64/include/uapi/asm/setup.h @@ -0,0 +1,25 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __IA64_SETUP_H +#define __IA64_SETUP_H + +#define COMMAND_LINE_SIZE 2048 + +extern struct ia64_boot_param { + __u64 command_line; /* physical address of command line arguments */ + __u64 efi_systab; /* physical address of EFI system table */ + __u64 efi_memmap; /* physical address of EFI memory map */ + __u64 efi_memmap_size; /* size of EFI memory map */ + __u64 efi_memdesc_size; /* size of an EFI memory map descriptor */ + __u32 efi_memdesc_version; /* memory descriptor version */ + struct { + __u16 num_cols; /* number of columns on console output device */ + __u16 num_rows; /* number of rows on console output device */ + __u16 orig_x; /* cursor's x position */ + __u16 orig_y; /* cursor's y position */ + } console_info; + __u64 fpswa; /* physical address of the fpswa interface */ + __u64 initrd_start; + __u64 initrd_size; +} *ia64_boot_param; + +#endif diff --git a/arch/ia64/include/uapi/asm/sigcontext.h b/arch/ia64/include/uapi/asm/sigcontext.h new file mode 100644 index 000000000000..1bb6f0f2bd73 --- /dev/null +++ b/arch/ia64/include/uapi/asm/sigcontext.h @@ -0,0 +1,71 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_SIGCONTEXT_H +#define _ASM_IA64_SIGCONTEXT_H + +/* + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * Copyright (C) 1998, 1999, 2001 David Mosberger-Tang + */ + +#include + +#define IA64_SC_FLAG_ONSTACK_BIT 0 /* is handler running on signal stack? */ +#define IA64_SC_FLAG_IN_SYSCALL_BIT 1 /* did signal interrupt a syscall? */ +#define IA64_SC_FLAG_FPH_VALID_BIT 2 /* is state in f[32]-f[127] valid? */ + +#define IA64_SC_FLAG_ONSTACK (1 << IA64_SC_FLAG_ONSTACK_BIT) +#define IA64_SC_FLAG_IN_SYSCALL (1 << IA64_SC_FLAG_IN_SYSCALL_BIT) +#define IA64_SC_FLAG_FPH_VALID (1 << IA64_SC_FLAG_FPH_VALID_BIT) + +# ifndef __ASSEMBLY__ + +/* + * Note on handling of register backing store: sc_ar_bsp contains the address that would + * be found in ar.bsp after executing a "cover" instruction the context in which the + * signal was raised. If signal delivery required switching to an alternate signal stack + * (sc_rbs_base is not NULL), the "dirty" partition (as it would exist after executing the + * imaginary "cover" instruction) is backed by the *alternate* signal stack, not the + * original one. In this case, sc_rbs_base contains the base address of the new register + * backing store. The number of registers in the dirty partition can be calculated as: + * + * ndirty = ia64_rse_num_regs(sc_rbs_base, sc_rbs_base + (sc_loadrs >> 16)) + * + */ + +struct sigcontext { + unsigned long sc_flags; /* see manifest constants above */ + unsigned long sc_nat; /* bit i == 1 iff scratch reg gr[i] is a NaT */ + stack_t sc_stack; /* previously active stack */ + + unsigned long sc_ip; /* instruction pointer */ + unsigned long sc_cfm; /* current frame marker */ + unsigned long sc_um; /* user mask bits */ + unsigned long sc_ar_rsc; /* register stack configuration register */ + unsigned long sc_ar_bsp; /* backing store pointer */ + unsigned long sc_ar_rnat; /* RSE NaT collection register */ + unsigned long sc_ar_ccv; /* compare and exchange compare value register */ + unsigned long sc_ar_unat; /* ar.unat of interrupted context */ + unsigned long sc_ar_fpsr; /* floating-point status register */ + unsigned long sc_ar_pfs; /* previous function state */ + unsigned long sc_ar_lc; /* loop count register */ + unsigned long sc_pr; /* predicate registers */ + unsigned long sc_br[8]; /* branch registers */ + /* Note: sc_gr[0] is used as the "uc_link" member of ucontext_t */ + unsigned long sc_gr[32]; /* general registers (static partition) */ + struct ia64_fpreg sc_fr[128]; /* floating-point registers */ + + unsigned long sc_rbs_base; /* NULL or new base of sighandler's rbs */ + unsigned long sc_loadrs; /* see description above */ + + unsigned long sc_ar25; /* cmp8xchg16 uses this */ + unsigned long sc_ar26; /* rsvd for scratch use */ + unsigned long sc_rsvd[12]; /* reserved for future use */ + /* + * The mask must come last so we can increase _NSIG_WORDS + * without breaking binary compatibility. + */ + sigset_t sc_mask; /* signal mask to restore after handler returns */ +}; + +# endif /* __ASSEMBLY__ */ +#endif /* _ASM_IA64_SIGCONTEXT_H */ diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h new file mode 100644 index 000000000000..796af1ccaa7e --- /dev/null +++ b/arch/ia64/include/uapi/asm/siginfo.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Based on . + * + * Modified 1998-2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_SIGINFO_H +#define _UAPI_ASM_IA64_SIGINFO_H + + +#include + +#define si_imm _sifields._sigfault._imm /* as per UNIX SysV ABI spec */ +#define si_flags _sifields._sigfault._flags +/* + * si_isr is valid for SIGILL, SIGFPE, SIGSEGV, SIGBUS, and SIGTRAP provided that + * si_code is non-zero and __ISR_VALID is set in si_flags. + */ +#define si_isr _sifields._sigfault._isr + +/* + * Flag values for si_flags: + */ +#define __ISR_VALID_BIT 0 +#define __ISR_VALID (1 << __ISR_VALID_BIT) + +#endif /* _UAPI_ASM_IA64_SIGINFO_H */ diff --git a/arch/ia64/include/uapi/asm/signal.h b/arch/ia64/include/uapi/asm/signal.h new file mode 100644 index 000000000000..63d574e802a2 --- /dev/null +++ b/arch/ia64/include/uapi/asm/signal.h @@ -0,0 +1,98 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Modified 1998-2001, 2003 + * David Mosberger-Tang , Hewlett-Packard Co + * + * Unfortunately, this file is being included by bits/signal.h in + * glibc-2.x. Hence the #ifdef __KERNEL__ ugliness. + */ +#ifndef _UAPI_ASM_IA64_SIGNAL_H +#define _UAPI_ASM_IA64_SIGNAL_H + + +#define SIGHUP 1 +#define SIGINT 2 +#define SIGQUIT 3 +#define SIGILL 4 +#define SIGTRAP 5 +#define SIGABRT 6 +#define SIGIOT 6 +#define SIGBUS 7 +#define SIGFPE 8 +#define SIGKILL 9 +#define SIGUSR1 10 +#define SIGSEGV 11 +#define SIGUSR2 12 +#define SIGPIPE 13 +#define SIGALRM 14 +#define SIGTERM 15 +#define SIGSTKFLT 16 +#define SIGCHLD 17 +#define SIGCONT 18 +#define SIGSTOP 19 +#define SIGTSTP 20 +#define SIGTTIN 21 +#define SIGTTOU 22 +#define SIGURG 23 +#define SIGXCPU 24 +#define SIGXFSZ 25 +#define SIGVTALRM 26 +#define SIGPROF 27 +#define SIGWINCH 28 +#define SIGIO 29 +#define SIGPOLL SIGIO +/* +#define SIGLOST 29 +*/ +#define SIGPWR 30 +#define SIGSYS 31 +/* signal 31 is no longer "unused", but the SIGUNUSED macro remains for backwards compatibility */ +#define SIGUNUSED 31 + +/* These should not be considered constants from userland. */ +#define SIGRTMIN 32 +#define SIGRTMAX _NSIG + +#define SA_RESTORER 0x04000000 + +/* + * The minimum stack size needs to be fairly large because we want to + * be sure that an app compiled for today's CPUs will continue to run + * on all future CPU models. The CPU model matters because the signal + * frame needs to have space for the complete machine state, including + * all physical stacked registers. The number of physical stacked + * registers is CPU model dependent, but given that the width of + * ar.rsc.loadrs is 14 bits, we can assume that they'll never take up + * more than 16KB of space. + */ +#if 1 + /* + * This is a stupid typo: the value was _meant_ to be 131072 (0x20000), but I typed it + * in wrong. ;-( To preserve backwards compatibility, we leave the kernel at the + * incorrect value and fix libc only. + */ +# define MINSIGSTKSZ 131027 /* min. stack size for sigaltstack() */ +#else +# define MINSIGSTKSZ 131072 /* min. stack size for sigaltstack() */ +#endif +#define SIGSTKSZ 262144 /* default stack size for sigaltstack() */ + + +#include + +# ifndef __ASSEMBLY__ + +# include + +/* Avoid too many header ordering problems. */ +struct siginfo; + +typedef struct sigaltstack { + void __user *ss_sp; + int ss_flags; + __kernel_size_t ss_size; +} stack_t; + + +# endif /* !__ASSEMBLY__ */ +#endif /* _UAPI_ASM_IA64_SIGNAL_H */ diff --git a/arch/ia64/include/uapi/asm/stat.h b/arch/ia64/include/uapi/asm/stat.h new file mode 100644 index 000000000000..3265ed5aac0f --- /dev/null +++ b/arch/ia64/include/uapi/asm/stat.h @@ -0,0 +1,52 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_STAT_H +#define _ASM_IA64_STAT_H + +/* + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +struct stat { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long st_blksize; + long st_blocks; + unsigned long __unused[3]; +}; + +#define STAT_HAVE_NSEC 1 + +struct ia64_oldstat { + unsigned int st_dev; + unsigned int st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + unsigned int __pad1; + unsigned long st_size; + unsigned long st_atime; + unsigned long st_mtime; + unsigned long st_ctime; + unsigned int st_blksize; + int st_blocks; + unsigned int __unused1; + unsigned int __unused2; +}; + +#endif /* _ASM_IA64_STAT_H */ diff --git a/arch/ia64/include/uapi/asm/statfs.h b/arch/ia64/include/uapi/asm/statfs.h new file mode 100644 index 000000000000..de3bae4f137d --- /dev/null +++ b/arch/ia64/include/uapi/asm/statfs.h @@ -0,0 +1,21 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_STATFS_H +#define _ASM_IA64_STATFS_H + +/* + * Based on . + * + * Modified 1998, 1999, 2003 + * David Mosberger-Tang , Hewlett-Packard Co + */ + +/* + * We need compat_statfs64 to be packed, because the i386 ABI won't + * add padding at the end to bring it to a multiple of 8 bytes, but + * the IA64 ABI will. + */ +#define ARCH_PACK_COMPAT_STATFS64 __attribute__((packed,aligned(4))) + +#include + +#endif /* _ASM_IA64_STATFS_H */ diff --git a/arch/ia64/include/uapi/asm/swab.h b/arch/ia64/include/uapi/asm/swab.h new file mode 100644 index 000000000000..79f3fef1a05e --- /dev/null +++ b/arch/ia64/include/uapi/asm/swab.h @@ -0,0 +1,35 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_SWAB_H +#define _ASM_IA64_SWAB_H + +/* + * Modified 1998, 1999 + * David Mosberger-Tang , Hewlett-Packard Co. + */ + +#include +#include +#include + +static __inline__ __attribute_const__ __u64 __arch_swab64(__u64 x) +{ + __u64 result; + + result = ia64_mux1(x, ia64_mux1_rev); + return result; +} +#define __arch_swab64 __arch_swab64 + +static __inline__ __attribute_const__ __u32 __arch_swab32(__u32 x) +{ + return __arch_swab64(x) >> 32; +} +#define __arch_swab32 __arch_swab32 + +static __inline__ __attribute_const__ __u16 __arch_swab16(__u16 x) +{ + return __arch_swab64(x) >> 48; +} +#define __arch_swab16 __arch_swab16 + +#endif /* _ASM_IA64_SWAB_H */ diff --git a/arch/ia64/include/uapi/asm/types.h b/arch/ia64/include/uapi/asm/types.h new file mode 100644 index 000000000000..2000de474be6 --- /dev/null +++ b/arch/ia64/include/uapi/asm/types.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is never included by application software unless explicitly + * requested (e.g., via linux/types.h) in which case the application is + * Linux specific so (user-) name space pollution is not a major issue. + * However, for interoperability, libraries still need to be careful to + * avoid naming clashes. + * + * Based on . + * + * Modified 1998-2000, 2002 + * David Mosberger-Tang , Hewlett-Packard Co + */ +#ifndef _UAPI_ASM_IA64_TYPES_H +#define _UAPI_ASM_IA64_TYPES_H + + +#ifndef __KERNEL__ +#include +#endif + +#ifdef __ASSEMBLY__ +# define __IA64_UL(x) (x) +# define __IA64_UL_CONST(x) x + +#else +# define __IA64_UL(x) ((unsigned long)(x)) +# define __IA64_UL_CONST(x) x##UL + +#endif /* !__ASSEMBLY__ */ + +#endif /* _UAPI_ASM_IA64_TYPES_H */ diff --git a/arch/ia64/include/uapi/asm/ucontext.h b/arch/ia64/include/uapi/asm/ucontext.h new file mode 100644 index 000000000000..46f51e535e04 --- /dev/null +++ b/arch/ia64/include/uapi/asm/ucontext.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_IA64_UCONTEXT_H +#define _ASM_IA64_UCONTEXT_H + +struct ucontext { + struct sigcontext uc_mcontext; +}; + +#define uc_link uc_mcontext.sc_gr[0] /* wrong type; nobody cares */ +#define uc_sigmask uc_mcontext.sc_sigmask +#define uc_stack uc_mcontext.sc_stack + +#endif /* _ASM_IA64_UCONTEXT_H */ diff --git a/arch/ia64/include/uapi/asm/unistd.h b/arch/ia64/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..013e0bcacc39 --- /dev/null +++ b/arch/ia64/include/uapi/asm/unistd.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * IA-64 Linux syscall numbers and inline-functions. + * + * Copyright (C) 1998-2005 Hewlett-Packard Co + * David Mosberger-Tang + */ +#ifndef _UAPI_ASM_IA64_UNISTD_H +#define _UAPI_ASM_IA64_UNISTD_H + + +#include + +#define __BREAK_SYSCALL __IA64_BREAK_SYSCALL + +#define __NR_Linux 1024 + +#define __NR_umount __NR_umount2 + +#include + +#endif /* _UAPI_ASM_IA64_UNISTD_H */ diff --git a/arch/ia64/include/uapi/asm/ustack.h b/arch/ia64/include/uapi/asm/ustack.h new file mode 100644 index 000000000000..703cc5f546ff --- /dev/null +++ b/arch/ia64/include/uapi/asm/ustack.h @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_ASM_IA64_USTACK_H +#define _UAPI_ASM_IA64_USTACK_H + +/* + * Constants for the user stack size + */ + + +/* Make a default stack size of 2GiB */ +#define DEFAULT_USER_STACK_SIZE (1UL << 31) + +#endif /* _UAPI_ASM_IA64_USTACK_H */ diff --git a/arch/ia64/install.sh b/arch/ia64/install.sh new file mode 100755 index 000000000000..2d4b66a9f362 --- /dev/null +++ b/arch/ia64/install.sh @@ -0,0 +1,30 @@ +#!/bin/sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# +# "make install" script for ia64 architecture +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) + +if [ -f $4/vmlinuz ]; then + mv $4/vmlinuz $4/vmlinuz.old +fi + +if [ -f $4/System.map ]; then + mv $4/System.map $4/System.old +fi + +cat $2 > $4/vmlinuz +cp $3 $4/System.map + +test -x /usr/sbin/elilo && /usr/sbin/elilo diff --git a/arch/ia64/kernel/.gitignore b/arch/ia64/kernel/.gitignore new file mode 100644 index 000000000000..0374827206e7 --- /dev/null +++ b/arch/ia64/kernel/.gitignore @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: GPL-2.0-only +gate.lds +vmlinux.lds diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile new file mode 100644 index 000000000000..d7e1cabee2ec --- /dev/null +++ b/arch/ia64/kernel/Makefile @@ -0,0 +1,46 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the linux kernel. +# + +ifdef CONFIG_DYNAMIC_FTRACE +CFLAGS_REMOVE_ftrace.o = -pg +endif + +extra-y := vmlinux.lds + +obj-y := head.o entry.o efi.o efi_stub.o gate-data.o fsys.o irq.o irq_ia64.o \ + irq_lsapic.o ivt.o pal.o patch.o process.o ptrace.o sal.o \ + salinfo.o setup.o signal.o sys_ia64.o time.o traps.o unaligned.o \ + unwind.o mca.o mca_asm.o topology.o dma-mapping.o iosapic.o acpi.o \ + acpi-ext.o + +obj-$(CONFIG_IA64_BRL_EMU) += brl_emu.o + +obj-$(CONFIG_IA64_PALINFO) += palinfo.o +obj-$(CONFIG_MODULES) += module.o +obj-$(CONFIG_SMP) += smp.o smpboot.o +obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_IA64_CYCLONE) += cyclone.o +obj-$(CONFIG_IA64_MCA_RECOVERY) += mca_recovery.o +obj-$(CONFIG_KPROBES) += kprobes.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o crash.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_IA64_UNCACHED_ALLOCATOR) += uncached.o +obj-$(CONFIG_AUDIT) += audit.o +obj-y += msi_ia64.o +mca_recovery-y += mca_drv.o mca_drv_asm.o +obj-$(CONFIG_IA64_MC_ERR_INJECT)+= err_inject.o +obj-$(CONFIG_STACKTRACE) += stacktrace.o + +obj-$(CONFIG_IA64_ESI) += esi.o esi_stub.o # must be in kernel proper +obj-$(CONFIG_INTEL_IOMMU) += pci-dma.o + +obj-$(CONFIG_ELF_CORE) += elfcore.o + +# fp_emulate() expects f2-f5,f16-f31 to contain the user-level state. +CFLAGS_traps.o += -mfixed-range=f2-f5,f16-f31 + +# The gate DSO image is built using a special linker script. +include $(srctree)/$(src)/Makefile.gate diff --git a/arch/ia64/kernel/Makefile.gate b/arch/ia64/kernel/Makefile.gate new file mode 100644 index 000000000000..846867bff6d6 --- /dev/null +++ b/arch/ia64/kernel/Makefile.gate @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +# The gate DSO image is built using a special linker script. + +targets += gate.so gate.lds gate.o gate-dummy.o + +obj-y += gate-syms.o + +CPPFLAGS_gate.lds := -P -C -U$(ARCH) + +quiet_cmd_gate = GATE $@ + cmd_gate = $(CC) -nostdlib $(GATECFLAGS_$(@F)) -Wl,-T,$(filter-out FORCE,$^) -o $@ + +GATECFLAGS_gate.so = -shared -s -Wl,-soname=linux-gate.so.1 \ + -Wl,--hash-style=sysv +$(obj)/gate.so: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +GATECFLAGS_gate-dummy.o = -r +$(obj)/gate-dummy.o: $(obj)/gate.lds $(obj)/gate.o FORCE + $(call if_changed,gate) + +LDFLAGS_gate-syms.o := -r -R +$(obj)/gate-syms.o: $(obj)/gate-dummy.o FORCE + $(call if_changed,ld) + +# gate-data.o contains the gate DSO image as data in section .data..gate. +# We must build gate.so before we can assemble it. +# Note: kbuild does not track this dependency due to usage of .incbin +$(obj)/gate-data.o: $(obj)/gate.so diff --git a/arch/ia64/kernel/acpi-ext.c b/arch/ia64/kernel/acpi-ext.c new file mode 100644 index 000000000000..42cd21480833 --- /dev/null +++ b/arch/ia64/kernel/acpi-ext.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (c) Copyright 2003, 2006 Hewlett-Packard Development Company, L.P. + * Alex Williamson + * Bjorn Helgaas + */ + +#include +#include +#include +#include + +#include + +/* + * Device CSRs that do not appear in PCI config space should be described + * via ACPI. This would normally be done with Address Space Descriptors + * marked as "consumer-only," but old versions of Windows and Linux ignore + * the producer/consumer flag, so HP invented a vendor-defined resource to + * describe the location and size of CSR space. + */ + +struct acpi_vendor_uuid hp_ccsr_uuid = { + .subtype = 2, + .data = { 0xf9, 0xad, 0xe9, 0x69, 0x4f, 0x92, 0x5f, 0xab, 0xf6, 0x4a, + 0x24, 0xd2, 0x01, 0x37, 0x0e, 0xad }, +}; + +static acpi_status hp_ccsr_locate(acpi_handle obj, u64 *base, u64 *length) +{ + acpi_status status; + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + struct acpi_resource *resource; + struct acpi_resource_vendor_typed *vendor; + + status = acpi_get_vendor_resource(obj, METHOD_NAME__CRS, &hp_ccsr_uuid, + &buffer); + + resource = buffer.pointer; + vendor = &resource->data.vendor_typed; + + if (ACPI_FAILURE(status) || vendor->byte_length < 16) { + status = AE_NOT_FOUND; + goto exit; + } + + memcpy(base, vendor->byte_data, sizeof(*base)); + memcpy(length, vendor->byte_data + 8, sizeof(*length)); + + exit: + kfree(buffer.pointer); + return status; +} + +struct csr_space { + u64 base; + u64 length; +}; + +static acpi_status find_csr_space(struct acpi_resource *resource, void *data) +{ + struct csr_space *space = data; + struct acpi_resource_address64 addr; + acpi_status status; + + status = acpi_resource_to_address64(resource, &addr); + if (ACPI_SUCCESS(status) && + addr.resource_type == ACPI_MEMORY_RANGE && + addr.address.address_length && + addr.producer_consumer == ACPI_CONSUMER) { + space->base = addr.address.minimum; + space->length = addr.address.address_length; + return AE_CTRL_TERMINATE; + } + return AE_OK; /* keep looking */ +} + +static acpi_status hp_crs_locate(acpi_handle obj, u64 *base, u64 *length) +{ + struct csr_space space = { 0, 0 }; + + acpi_walk_resources(obj, METHOD_NAME__CRS, find_csr_space, &space); + if (!space.length) + return AE_NOT_FOUND; + + *base = space.base; + *length = space.length; + return AE_OK; +} + +acpi_status hp_acpi_csr_space(acpi_handle obj, u64 *csr_base, u64 *csr_length) +{ + acpi_status status; + + status = hp_ccsr_locate(obj, csr_base, csr_length); + if (ACPI_SUCCESS(status)) + return status; + + return hp_crs_locate(obj, csr_base, csr_length); +} +EXPORT_SYMBOL(hp_acpi_csr_space); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c new file mode 100644 index 000000000000..41e8fe55cd98 --- /dev/null +++ b/arch/ia64/kernel/acpi.c @@ -0,0 +1,913 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * acpi.c - Architecture-Specific Low-Level ACPI Support + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang + * Copyright (C) 2000 Intel Corp. + * Copyright (C) 2000,2001 J.I. Lee + * Copyright (C) 2001 Paul Diefenbaugh + * Copyright (C) 2001 Jenna Hall + * Copyright (C) 2001 Takayoshi Kochi + * Copyright (C) 2002 Erich Focht + * Copyright (C) 2004 Ashok Raj + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define PREFIX "ACPI: " + +int acpi_lapic; +unsigned int acpi_cpei_override; +unsigned int acpi_cpei_phys_cpuid; + +#define ACPI_MAX_PLATFORM_INTERRUPTS 256 + +/* Array to record platform interrupt vectors for generic interrupt routing. */ +int platform_intr_list[ACPI_MAX_PLATFORM_INTERRUPTS] = { + [0 ... ACPI_MAX_PLATFORM_INTERRUPTS - 1] = -1 +}; + +enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_IOSAPIC; + +/* + * Interrupt routing API for device drivers. Provides interrupt vector for + * a generic platform event. Currently only CPEI is implemented. + */ +int acpi_request_vector(u32 int_type) +{ + int vector = -1; + + if (int_type < ACPI_MAX_PLATFORM_INTERRUPTS) { + /* corrected platform error interrupt */ + vector = platform_intr_list[int_type]; + } else + printk(KERN_ERR + "acpi_request_vector(): invalid interrupt type\n"); + return vector; +} + +void __init __iomem *__acpi_map_table(unsigned long phys, unsigned long size) +{ + return __va(phys); +} + +void __init __acpi_unmap_table(void __iomem *map, unsigned long size) +{ +} + +/* -------------------------------------------------------------------------- + Boot-time Table Parsing + -------------------------------------------------------------------------- */ + +static int available_cpus __initdata; +struct acpi_table_madt *acpi_madt __initdata; +static u8 has_8259; + +static int __init +acpi_parse_lapic_addr_ovr(union acpi_subtable_headers * header, + const unsigned long end) +{ + struct acpi_madt_local_apic_override *lapic; + + lapic = (struct acpi_madt_local_apic_override *)header; + + if (BAD_MADT_ENTRY(lapic, end)) + return -EINVAL; + + if (lapic->address) { + iounmap(ipi_base_addr); + ipi_base_addr = ioremap(lapic->address, 0); + } + return 0; +} + +static int __init +acpi_parse_lsapic(union acpi_subtable_headers *header, const unsigned long end) +{ + struct acpi_madt_local_sapic *lsapic; + + lsapic = (struct acpi_madt_local_sapic *)header; + + /*Skip BAD_MADT_ENTRY check, as lsapic size could vary */ + + if (lsapic->lapic_flags & ACPI_MADT_ENABLED) { +#ifdef CONFIG_SMP + smp_boot_data.cpu_phys_id[available_cpus] = + (lsapic->id << 8) | lsapic->eid; +#endif + ++available_cpus; + } + + total_cpus++; + return 0; +} + +static int __init +acpi_parse_lapic_nmi(union acpi_subtable_headers * header, const unsigned long end) +{ + struct acpi_madt_local_apic_nmi *lacpi_nmi; + + lacpi_nmi = (struct acpi_madt_local_apic_nmi *)header; + + if (BAD_MADT_ENTRY(lacpi_nmi, end)) + return -EINVAL; + + /* TBD: Support lapic_nmi entries */ + return 0; +} + +static int __init +acpi_parse_iosapic(union acpi_subtable_headers * header, const unsigned long end) +{ + struct acpi_madt_io_sapic *iosapic; + + iosapic = (struct acpi_madt_io_sapic *)header; + + if (BAD_MADT_ENTRY(iosapic, end)) + return -EINVAL; + + return iosapic_init(iosapic->address, iosapic->global_irq_base); +} + +static unsigned int __initdata acpi_madt_rev; + +static int __init +acpi_parse_plat_int_src(union acpi_subtable_headers * header, + const unsigned long end) +{ + struct acpi_madt_interrupt_source *plintsrc; + int vector; + + plintsrc = (struct acpi_madt_interrupt_source *)header; + + if (BAD_MADT_ENTRY(plintsrc, end)) + return -EINVAL; + + /* + * Get vector assignment for this interrupt, set attributes, + * and program the IOSAPIC routing table. + */ + vector = iosapic_register_platform_intr(plintsrc->type, + plintsrc->global_irq, + plintsrc->io_sapic_vector, + plintsrc->eid, + plintsrc->id, + ((plintsrc->inti_flags & ACPI_MADT_POLARITY_MASK) == + ACPI_MADT_POLARITY_ACTIVE_HIGH) ? + IOSAPIC_POL_HIGH : IOSAPIC_POL_LOW, + ((plintsrc->inti_flags & ACPI_MADT_TRIGGER_MASK) == + ACPI_MADT_TRIGGER_EDGE) ? + IOSAPIC_EDGE : IOSAPIC_LEVEL); + + platform_intr_list[plintsrc->type] = vector; + if (acpi_madt_rev > 1) { + acpi_cpei_override = plintsrc->flags & ACPI_MADT_CPEI_OVERRIDE; + } + + /* + * Save the physical id, so we can check when its being removed + */ + acpi_cpei_phys_cpuid = ((plintsrc->id << 8) | (plintsrc->eid)) & 0xffff; + + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +unsigned int can_cpei_retarget(void) +{ + extern int cpe_vector; + extern unsigned int force_cpei_retarget; + + /* + * Only if CPEI is supported and the override flag + * is present, otherwise return that its re-targettable + * if we are in polling mode. + */ + if (cpe_vector > 0) { + if (acpi_cpei_override || force_cpei_retarget) + return 1; + else + return 0; + } + return 1; +} + +unsigned int is_cpu_cpei_target(unsigned int cpu) +{ + unsigned int logical_id; + + logical_id = cpu_logical_id(acpi_cpei_phys_cpuid); + + if (logical_id == cpu) + return 1; + else + return 0; +} + +void set_cpei_target_cpu(unsigned int cpu) +{ + acpi_cpei_phys_cpuid = cpu_physical_id(cpu); +} +#endif + +unsigned int get_cpei_target_cpu(void) +{ + return acpi_cpei_phys_cpuid; +} + +static int __init +acpi_parse_int_src_ovr(union acpi_subtable_headers * header, + const unsigned long end) +{ + struct acpi_madt_interrupt_override *p; + + p = (struct acpi_madt_interrupt_override *)header; + + if (BAD_MADT_ENTRY(p, end)) + return -EINVAL; + + iosapic_override_isa_irq(p->source_irq, p->global_irq, + ((p->inti_flags & ACPI_MADT_POLARITY_MASK) == + ACPI_MADT_POLARITY_ACTIVE_LOW) ? + IOSAPIC_POL_LOW : IOSAPIC_POL_HIGH, + ((p->inti_flags & ACPI_MADT_TRIGGER_MASK) == + ACPI_MADT_TRIGGER_LEVEL) ? + IOSAPIC_LEVEL : IOSAPIC_EDGE); + return 0; +} + +static int __init +acpi_parse_nmi_src(union acpi_subtable_headers * header, const unsigned long end) +{ + struct acpi_madt_nmi_source *nmi_src; + + nmi_src = (struct acpi_madt_nmi_source *)header; + + if (BAD_MADT_ENTRY(nmi_src, end)) + return -EINVAL; + + /* TBD: Support nimsrc entries */ + return 0; +} + +static void __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) +{ + if (!strncmp(oem_id, "IBM", 3) && (!strncmp(oem_table_id, "SERMOW", 6))) { + + /* + * Unfortunately ITC_DRIFT is not yet part of the + * official SAL spec, so the ITC_DRIFT bit is not + * set by the BIOS on this hardware. + */ + sal_platform_features |= IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT; + + cyclone_setup(); + } +} + +static int __init acpi_parse_madt(struct acpi_table_header *table) +{ + acpi_madt = (struct acpi_table_madt *)table; + + acpi_madt_rev = acpi_madt->header.revision; + + /* remember the value for reference after free_initmem() */ +#ifdef CONFIG_ITANIUM + has_8259 = 1; /* Firmware on old Itanium systems is broken */ +#else + has_8259 = acpi_madt->flags & ACPI_MADT_PCAT_COMPAT; +#endif + iosapic_system_init(has_8259); + + /* Get base address of IPI Message Block */ + + if (acpi_madt->address) + ipi_base_addr = ioremap(acpi_madt->address, 0); + + printk(KERN_INFO PREFIX "Local APIC address %p\n", ipi_base_addr); + + acpi_madt_oem_check(acpi_madt->header.oem_id, + acpi_madt->header.oem_table_id); + + return 0; +} + +#ifdef CONFIG_ACPI_NUMA + +#undef SLIT_DEBUG + +#define PXM_FLAG_LEN ((MAX_PXM_DOMAINS + 1)/32) + +static int __initdata srat_num_cpus; /* number of cpus */ +static u32 pxm_flag[PXM_FLAG_LEN]; +#define pxm_bit_set(bit) (set_bit(bit,(void *)pxm_flag)) +#define pxm_bit_test(bit) (test_bit(bit,(void *)pxm_flag)) +static struct acpi_table_slit __initdata *slit_table; +cpumask_t early_cpu_possible_map = CPU_MASK_NONE; + +static int __init +get_processor_proximity_domain(struct acpi_srat_cpu_affinity *pa) +{ + int pxm; + + pxm = pa->proximity_domain_lo; + if (acpi_srat_revision >= 2) + pxm += pa->proximity_domain_hi[0] << 8; + return pxm; +} + +static int __init +get_memory_proximity_domain(struct acpi_srat_mem_affinity *ma) +{ + int pxm; + + pxm = ma->proximity_domain; + if (acpi_srat_revision <= 1) + pxm &= 0xff; + + return pxm; +} + +/* + * ACPI 2.0 SLIT (System Locality Information Table) + * http://devresource.hp.com/devresource/Docs/TechPapers/IA64/slit.pdf + */ +void __init acpi_numa_slit_init(struct acpi_table_slit *slit) +{ + u32 len; + + len = sizeof(struct acpi_table_header) + 8 + + slit->locality_count * slit->locality_count; + if (slit->header.length != len) { + printk(KERN_ERR + "ACPI 2.0 SLIT: size mismatch: %d expected, %d actual\n", + len, slit->header.length); + return; + } + slit_table = slit; +} + +void __init +acpi_numa_processor_affinity_init(struct acpi_srat_cpu_affinity *pa) +{ + int pxm; + + if (!(pa->flags & ACPI_SRAT_CPU_ENABLED)) + return; + + if (srat_num_cpus >= ARRAY_SIZE(node_cpuid)) { + printk_once(KERN_WARNING + "node_cpuid[%ld] is too small, may not be able to use all cpus\n", + ARRAY_SIZE(node_cpuid)); + return; + } + pxm = get_processor_proximity_domain(pa); + + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); + + node_cpuid[srat_num_cpus].phys_id = + (pa->apic_id << 8) | (pa->local_sapic_eid); + /* nid should be overridden as logical node id later */ + node_cpuid[srat_num_cpus].nid = pxm; + cpumask_set_cpu(srat_num_cpus, &early_cpu_possible_map); + srat_num_cpus++; +} + +int __init +acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) +{ + unsigned long paddr, size; + int pxm; + struct node_memblk_s *p, *q, *pend; + + pxm = get_memory_proximity_domain(ma); + + /* fill node memory chunk structure */ + paddr = ma->base_address; + size = ma->length; + + /* Ignore disabled entries */ + if (!(ma->flags & ACPI_SRAT_MEM_ENABLED)) + return -1; + + if (num_node_memblks >= NR_NODE_MEMBLKS) { + pr_err("NUMA: too many memblk ranges\n"); + return -EINVAL; + } + + /* record this node in proximity bitmap */ + pxm_bit_set(pxm); + + /* Insertion sort based on base address */ + pend = &node_memblk[num_node_memblks]; + for (p = &node_memblk[0]; p < pend; p++) { + if (paddr < p->start_paddr) + break; + } + if (p < pend) { + for (q = pend - 1; q >= p; q--) + *(q + 1) = *q; + } + p->start_paddr = paddr; + p->size = size; + p->nid = pxm; + num_node_memblks++; + return 0; +} + +void __init acpi_numa_fixup(void) +{ + int i, j, node_from, node_to; + + /* If there's no SRAT, fix the phys_id and mark node 0 online */ + if (srat_num_cpus == 0) { + node_set_online(0); + node_cpuid[0].phys_id = hard_smp_processor_id(); + slit_distance(0, 0) = LOCAL_DISTANCE; + goto out; + } + + /* + * MCD - This can probably be dropped now. No need for pxm ID to node ID + * mapping with sparse node numbering iff MAX_PXM_DOMAINS <= MAX_NUMNODES. + */ + nodes_clear(node_online_map); + for (i = 0; i < MAX_PXM_DOMAINS; i++) { + if (pxm_bit_test(i)) { + int nid = acpi_map_pxm_to_node(i); + node_set_online(nid); + } + } + + /* set logical node id in memory chunk structure */ + for (i = 0; i < num_node_memblks; i++) + node_memblk[i].nid = pxm_to_node(node_memblk[i].nid); + + /* assign memory bank numbers for each chunk on each node */ + for_each_online_node(i) { + int bank; + + bank = 0; + for (j = 0; j < num_node_memblks; j++) + if (node_memblk[j].nid == i) + node_memblk[j].bank = bank++; + } + + /* set logical node id in cpu structure */ + for_each_possible_early_cpu(i) + node_cpuid[i].nid = pxm_to_node(node_cpuid[i].nid); + + printk(KERN_INFO "Number of logical nodes in system = %d\n", + num_online_nodes()); + printk(KERN_INFO "Number of memory chunks in system = %d\n", + num_node_memblks); + + if (!slit_table) { + for (i = 0; i < MAX_NUMNODES; i++) + for (j = 0; j < MAX_NUMNODES; j++) + slit_distance(i, j) = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; + goto out; + } + + memset(numa_slit, -1, sizeof(numa_slit)); + for (i = 0; i < slit_table->locality_count; i++) { + if (!pxm_bit_test(i)) + continue; + node_from = pxm_to_node(i); + for (j = 0; j < slit_table->locality_count; j++) { + if (!pxm_bit_test(j)) + continue; + node_to = pxm_to_node(j); + slit_distance(node_from, node_to) = + slit_table->entry[i * slit_table->locality_count + j]; + } + } + +#ifdef SLIT_DEBUG + printk("ACPI 2.0 SLIT locality table:\n"); + for_each_online_node(i) { + for_each_online_node(j) + printk("%03d ", node_distance(i, j)); + printk("\n"); + } +#endif +out: + node_possible_map = node_online_map; +} +#endif /* CONFIG_ACPI_NUMA */ + +/* + * success: return IRQ number (>=0) + * failure: return < 0 + */ +int acpi_register_gsi(struct device *dev, u32 gsi, int triggering, int polarity) +{ + if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) + return gsi; + + if (has_8259 && gsi < 16) + return isa_irq_to_vector(gsi); + + return iosapic_register_intr(gsi, + (polarity == + ACPI_ACTIVE_HIGH) ? IOSAPIC_POL_HIGH : + IOSAPIC_POL_LOW, + (triggering == + ACPI_EDGE_SENSITIVE) ? IOSAPIC_EDGE : + IOSAPIC_LEVEL); +} +EXPORT_SYMBOL_GPL(acpi_register_gsi); + +void acpi_unregister_gsi(u32 gsi) +{ + if (acpi_irq_model == ACPI_IRQ_MODEL_PLATFORM) + return; + + if (has_8259 && gsi < 16) + return; + + iosapic_unregister_intr(gsi); +} +EXPORT_SYMBOL_GPL(acpi_unregister_gsi); + +static int __init acpi_parse_fadt(struct acpi_table_header *table) +{ + struct acpi_table_header *fadt_header; + struct acpi_table_fadt *fadt; + + fadt_header = (struct acpi_table_header *)table; + if (fadt_header->revision != 3) + return -ENODEV; /* Only deal with ACPI 2.0 FADT */ + + fadt = (struct acpi_table_fadt *)fadt_header; + + acpi_register_gsi(NULL, fadt->sci_interrupt, ACPI_LEVEL_SENSITIVE, + ACPI_ACTIVE_LOW); + return 0; +} + +int __init early_acpi_boot_init(void) +{ + int ret; + + /* + * do a partial walk of MADT to determine how many CPUs + * we have including offline CPUs + */ + if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + return 0; + } + + ret = acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_SAPIC, + acpi_parse_lsapic, NR_CPUS); + if (ret < 1) + printk(KERN_ERR PREFIX + "Error parsing MADT - no LAPIC entries\n"); + else + acpi_lapic = 1; + +#ifdef CONFIG_SMP + if (available_cpus == 0) { + printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n"); + printk(KERN_INFO "CPU 0 (0x%04x)", hard_smp_processor_id()); + smp_boot_data.cpu_phys_id[available_cpus] = + hard_smp_processor_id(); + available_cpus = 1; /* We've got at least one of these, no? */ + } + smp_boot_data.cpu_count = available_cpus; +#endif + /* Make boot-up look pretty */ + printk(KERN_INFO "%d CPUs available, %d CPUs total\n", available_cpus, + total_cpus); + + return 0; +} + +int __init acpi_boot_init(void) +{ + + /* + * MADT + * ---- + * Parse the Multiple APIC Description Table (MADT), if exists. + * Note that this table provides platform SMP configuration + * information -- the successor to MPS tables. + */ + + if (acpi_table_parse(ACPI_SIG_MADT, acpi_parse_madt)) { + printk(KERN_ERR PREFIX "Can't find MADT\n"); + goto skip_madt; + } + + /* Local APIC */ + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_LOCAL_APIC_OVERRIDE, acpi_parse_lapic_addr_ovr, 0) < 0) + printk(KERN_ERR PREFIX + "Error parsing LAPIC address override entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_TYPE_LOCAL_APIC_NMI, acpi_parse_lapic_nmi, 0) + < 0) + printk(KERN_ERR PREFIX "Error parsing LAPIC NMI entry\n"); + + /* I/O APIC */ + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_IO_SAPIC, acpi_parse_iosapic, NR_IOSAPICS) < 1) { + printk(KERN_ERR PREFIX + "Error parsing MADT - no IOSAPIC entries\n"); + } + + /* System-Level Interrupt Routing */ + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_INTERRUPT_SOURCE, acpi_parse_plat_int_src, + ACPI_MAX_PLATFORM_INTERRUPTS) < 0) + printk(KERN_ERR PREFIX + "Error parsing platform interrupt source entry\n"); + + if (acpi_table_parse_madt + (ACPI_MADT_TYPE_INTERRUPT_OVERRIDE, acpi_parse_int_src_ovr, 0) < 0) + printk(KERN_ERR PREFIX + "Error parsing interrupt source overrides entry\n"); + + if (acpi_table_parse_madt(ACPI_MADT_TYPE_NMI_SOURCE, acpi_parse_nmi_src, 0) < 0) + printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n"); + skip_madt: + + /* + * FADT says whether a legacy keyboard controller is present. + * The FADT also contains an SCI_INT line, by which the system + * gets interrupts such as power and sleep buttons. If it's not + * on a Legacy interrupt, it needs to be setup. + */ + if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt)) + printk(KERN_ERR PREFIX "Can't find FADT\n"); + +#ifdef CONFIG_ACPI_NUMA +#ifdef CONFIG_SMP + if (srat_num_cpus == 0) { + int cpu, i = 1; + for (cpu = 0; cpu < smp_boot_data.cpu_count; cpu++) + if (smp_boot_data.cpu_phys_id[cpu] != + hard_smp_processor_id()) + node_cpuid[i++].phys_id = + smp_boot_data.cpu_phys_id[cpu]; + } +#endif + build_cpu_to_node_map(); +#endif + return 0; +} + +int acpi_gsi_to_irq(u32 gsi, unsigned int *irq) +{ + int tmp; + + if (has_8259 && gsi < 16) + *irq = isa_irq_to_vector(gsi); + else { + tmp = gsi_to_irq(gsi); + if (tmp == -1) + return -1; + *irq = tmp; + } + return 0; +} + +int acpi_isa_irq_to_gsi(unsigned isa_irq, u32 *gsi) +{ + if (isa_irq >= 16) + return -1; + *gsi = isa_irq; + return 0; +} + +/* + * ACPI based hotplug CPU support + */ +#ifdef CONFIG_ACPI_HOTPLUG_CPU +int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +{ +#ifdef CONFIG_ACPI_NUMA + /* + * We don't have cpu-only-node hotadd. But if the system equips + * SRAT table, pxm is already found and node is ready. + * So, just pxm_to_nid(pxm) is OK. + * This code here is for the system which doesn't have full SRAT + * table for possible cpus. + */ + node_cpuid[cpu].phys_id = physid; + node_cpuid[cpu].nid = acpi_get_node(handle); +#endif + return 0; +} + +int additional_cpus __initdata = -1; + +static __init int setup_additional_cpus(char *s) +{ + if (s) + additional_cpus = simple_strtol(s, NULL, 0); + + return 0; +} + +early_param("additional_cpus", setup_additional_cpus); + +/* + * cpu_possible_mask should be static, it cannot change as CPUs + * are onlined, or offlined. The reason is per-cpu data-structures + * are allocated by some modules at init time, and dont expect to + * do this dynamically on cpu arrival/departure. + * cpu_present_mask on the other hand can change dynamically. + * In case when cpu_hotplug is not compiled, then we resort to current + * behaviour, which is cpu_possible == cpu_present. + * - Ashok Raj + * + * Three ways to find out the number of additional hotplug CPUs: + * - If the BIOS specified disabled CPUs in ACPI/mptables use that. + * - The user can overwrite it with additional_cpus=NUM + * - Otherwise don't reserve additional CPUs. + */ +__init void prefill_possible_map(void) +{ + int i; + int possible, disabled_cpus; + + disabled_cpus = total_cpus - available_cpus; + + if (additional_cpus == -1) { + if (disabled_cpus > 0) + additional_cpus = disabled_cpus; + else + additional_cpus = 0; + } + + possible = available_cpus + additional_cpus; + + if (possible > nr_cpu_ids) + possible = nr_cpu_ids; + + printk(KERN_INFO "SMP: Allowing %d CPUs, %d hotplug CPUs\n", + possible, max((possible - available_cpus), 0)); + + for (i = 0; i < possible; i++) + set_cpu_possible(i, true); +} + +static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) +{ + int cpu; + + cpu = cpumask_first_zero(cpu_present_mask); + if (cpu >= nr_cpu_ids) + return -EINVAL; + + acpi_map_cpu2node(handle, cpu, physid); + + set_cpu_present(cpu, true); + ia64_cpu_to_sapicid[cpu] = physid; + + acpi_processor_set_pdc(handle); + + *pcpu = cpu; + return (0); +} + +/* wrapper to silence section mismatch warning */ +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, + int *pcpu) +{ + return _acpi_map_lsapic(handle, physid, pcpu); +} +EXPORT_SYMBOL(acpi_map_cpu); + +int acpi_unmap_cpu(int cpu) +{ + ia64_cpu_to_sapicid[cpu] = -1; + set_cpu_present(cpu, false); + +#ifdef CONFIG_ACPI_NUMA + /* NUMA specific cleanup's */ +#endif + + return (0); +} +EXPORT_SYMBOL(acpi_unmap_cpu); +#endif /* CONFIG_ACPI_HOTPLUG_CPU */ + +#ifdef CONFIG_ACPI_NUMA +static acpi_status acpi_map_iosapic(acpi_handle handle, u32 depth, + void *context, void **ret) +{ + struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; + union acpi_object *obj; + struct acpi_madt_io_sapic *iosapic; + unsigned int gsi_base; + int node; + + /* Only care about objects w/ a method that returns the MADT */ + if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) + return AE_OK; + + if (!buffer.length || !buffer.pointer) + return AE_OK; + + obj = buffer.pointer; + if (obj->type != ACPI_TYPE_BUFFER || + obj->buffer.length < sizeof(*iosapic)) { + kfree(buffer.pointer); + return AE_OK; + } + + iosapic = (struct acpi_madt_io_sapic *)obj->buffer.pointer; + + if (iosapic->header.type != ACPI_MADT_TYPE_IO_SAPIC) { + kfree(buffer.pointer); + return AE_OK; + } + + gsi_base = iosapic->global_irq_base; + + kfree(buffer.pointer); + + /* OK, it's an IOSAPIC MADT entry; associate it with a node */ + node = acpi_get_node(handle); + if (node == NUMA_NO_NODE || !node_online(node) || + cpumask_empty(cpumask_of_node(node))) + return AE_OK; + + /* We know a gsi to node mapping! */ + map_iosapic_to_node(gsi_base, node); + return AE_OK; +} + +static int __init +acpi_map_iosapics (void) +{ + acpi_get_devices(NULL, acpi_map_iosapic, NULL, NULL); + return 0; +} + +fs_initcall(acpi_map_iosapics); +#endif /* CONFIG_ACPI_NUMA */ + +int __ref acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base) +{ + int err; + + if ((err = iosapic_init(phys_addr, gsi_base))) + return err; + +#ifdef CONFIG_ACPI_NUMA + acpi_map_iosapic(handle, 0, NULL, NULL); +#endif /* CONFIG_ACPI_NUMA */ + + return 0; +} + +EXPORT_SYMBOL(acpi_register_ioapic); + +int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base) +{ + return iosapic_remove(gsi_base); +} + +EXPORT_SYMBOL(acpi_unregister_ioapic); + +/* + * acpi_suspend_lowlevel() - save kernel state and suspend. + * + * TBD when IA64 starts to support suspend... + */ +int acpi_suspend_lowlevel(void) { return 0; } + +void acpi_proc_quirk_mwait_check(void) +{ +} diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c new file mode 100644 index 000000000000..be3b90fef2e9 --- /dev/null +++ b/arch/ia64/kernel/asm-offsets.c @@ -0,0 +1,289 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Generate definitions needed by assembly language modules. + * This code generates raw asm output which is post-processed + * to extract and format the required data. + */ + +#define ASM_OFFSETS_C 1 + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kernel/sigframe.h" +#include "../kernel/fsyscall_gtod_data.h" + +void foo(void) +{ + DEFINE(IA64_TASK_SIZE, sizeof (struct task_struct)); + DEFINE(IA64_THREAD_INFO_SIZE, sizeof (struct thread_info)); + DEFINE(IA64_PT_REGS_SIZE, sizeof (struct pt_regs)); + DEFINE(IA64_SWITCH_STACK_SIZE, sizeof (struct switch_stack)); + DEFINE(IA64_SIGINFO_SIZE, sizeof (struct siginfo)); + DEFINE(IA64_CPU_SIZE, sizeof (struct cpuinfo_ia64)); + DEFINE(SIGFRAME_SIZE, sizeof (struct sigframe)); + DEFINE(UNW_FRAME_INFO_SIZE, sizeof (struct unw_frame_info)); + + BUILD_BUG_ON(sizeof(struct upid) != 16); + DEFINE(IA64_UPID_SHIFT, 4); + + BLANK(); + + DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); + DEFINE(TI_CPU, offsetof(struct thread_info, cpu)); + DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count)); +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + DEFINE(TI_AC_STAMP, offsetof(struct thread_info, ac_stamp)); + DEFINE(TI_AC_LEAVE, offsetof(struct thread_info, ac_leave)); + DEFINE(TI_AC_STIME, offsetof(struct thread_info, ac_stime)); + DEFINE(TI_AC_UTIME, offsetof(struct thread_info, ac_utime)); +#endif + + BLANK(); + + DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); + DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); + DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid)); + DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); + DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); + DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); + DEFINE(IA64_TASK_PID_OFFSET, offsetof (struct task_struct, pid)); + DEFINE(IA64_TASK_REAL_PARENT_OFFSET, offsetof (struct task_struct, real_parent)); + DEFINE(IA64_TASK_SIGNAL_OFFSET,offsetof (struct task_struct, signal)); + DEFINE(IA64_TASK_TGID_OFFSET, offsetof (struct task_struct, tgid)); + DEFINE(IA64_TASK_THREAD_KSP_OFFSET, offsetof (struct task_struct, thread.ksp)); + DEFINE(IA64_TASK_THREAD_ON_USTACK_OFFSET, offsetof (struct task_struct, thread.on_ustack)); + + BLANK(); + + + DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, + group_stop_count)); + DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); + DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID])); + + BLANK(); + + DEFINE(IA64_PT_REGS_B6_OFFSET, offsetof (struct pt_regs, b6)); + DEFINE(IA64_PT_REGS_B7_OFFSET, offsetof (struct pt_regs, b7)); + DEFINE(IA64_PT_REGS_AR_CSD_OFFSET, offsetof (struct pt_regs, ar_csd)); + DEFINE(IA64_PT_REGS_AR_SSD_OFFSET, offsetof (struct pt_regs, ar_ssd)); + DEFINE(IA64_PT_REGS_R8_OFFSET, offsetof (struct pt_regs, r8)); + DEFINE(IA64_PT_REGS_R9_OFFSET, offsetof (struct pt_regs, r9)); + DEFINE(IA64_PT_REGS_R10_OFFSET, offsetof (struct pt_regs, r10)); + DEFINE(IA64_PT_REGS_R11_OFFSET, offsetof (struct pt_regs, r11)); + DEFINE(IA64_PT_REGS_CR_IPSR_OFFSET, offsetof (struct pt_regs, cr_ipsr)); + DEFINE(IA64_PT_REGS_CR_IIP_OFFSET, offsetof (struct pt_regs, cr_iip)); + DEFINE(IA64_PT_REGS_CR_IFS_OFFSET, offsetof (struct pt_regs, cr_ifs)); + DEFINE(IA64_PT_REGS_AR_UNAT_OFFSET, offsetof (struct pt_regs, ar_unat)); + DEFINE(IA64_PT_REGS_AR_PFS_OFFSET, offsetof (struct pt_regs, ar_pfs)); + DEFINE(IA64_PT_REGS_AR_RSC_OFFSET, offsetof (struct pt_regs, ar_rsc)); + DEFINE(IA64_PT_REGS_AR_RNAT_OFFSET, offsetof (struct pt_regs, ar_rnat)); + + DEFINE(IA64_PT_REGS_AR_BSPSTORE_OFFSET, offsetof (struct pt_regs, ar_bspstore)); + DEFINE(IA64_PT_REGS_PR_OFFSET, offsetof (struct pt_regs, pr)); + DEFINE(IA64_PT_REGS_B0_OFFSET, offsetof (struct pt_regs, b0)); + DEFINE(IA64_PT_REGS_LOADRS_OFFSET, offsetof (struct pt_regs, loadrs)); + DEFINE(IA64_PT_REGS_R1_OFFSET, offsetof (struct pt_regs, r1)); + DEFINE(IA64_PT_REGS_R12_OFFSET, offsetof (struct pt_regs, r12)); + DEFINE(IA64_PT_REGS_R13_OFFSET, offsetof (struct pt_regs, r13)); + DEFINE(IA64_PT_REGS_AR_FPSR_OFFSET, offsetof (struct pt_regs, ar_fpsr)); + DEFINE(IA64_PT_REGS_R15_OFFSET, offsetof (struct pt_regs, r15)); + DEFINE(IA64_PT_REGS_R14_OFFSET, offsetof (struct pt_regs, r14)); + DEFINE(IA64_PT_REGS_R2_OFFSET, offsetof (struct pt_regs, r2)); + DEFINE(IA64_PT_REGS_R3_OFFSET, offsetof (struct pt_regs, r3)); + DEFINE(IA64_PT_REGS_R16_OFFSET, offsetof (struct pt_regs, r16)); + DEFINE(IA64_PT_REGS_R17_OFFSET, offsetof (struct pt_regs, r17)); + DEFINE(IA64_PT_REGS_R18_OFFSET, offsetof (struct pt_regs, r18)); + DEFINE(IA64_PT_REGS_R19_OFFSET, offsetof (struct pt_regs, r19)); + DEFINE(IA64_PT_REGS_R20_OFFSET, offsetof (struct pt_regs, r20)); + DEFINE(IA64_PT_REGS_R21_OFFSET, offsetof (struct pt_regs, r21)); + DEFINE(IA64_PT_REGS_R22_OFFSET, offsetof (struct pt_regs, r22)); + DEFINE(IA64_PT_REGS_R23_OFFSET, offsetof (struct pt_regs, r23)); + DEFINE(IA64_PT_REGS_R24_OFFSET, offsetof (struct pt_regs, r24)); + DEFINE(IA64_PT_REGS_R25_OFFSET, offsetof (struct pt_regs, r25)); + DEFINE(IA64_PT_REGS_R26_OFFSET, offsetof (struct pt_regs, r26)); + DEFINE(IA64_PT_REGS_R27_OFFSET, offsetof (struct pt_regs, r27)); + DEFINE(IA64_PT_REGS_R28_OFFSET, offsetof (struct pt_regs, r28)); + DEFINE(IA64_PT_REGS_R29_OFFSET, offsetof (struct pt_regs, r29)); + DEFINE(IA64_PT_REGS_R30_OFFSET, offsetof (struct pt_regs, r30)); + DEFINE(IA64_PT_REGS_R31_OFFSET, offsetof (struct pt_regs, r31)); + DEFINE(IA64_PT_REGS_AR_CCV_OFFSET, offsetof (struct pt_regs, ar_ccv)); + DEFINE(IA64_PT_REGS_F6_OFFSET, offsetof (struct pt_regs, f6)); + DEFINE(IA64_PT_REGS_F7_OFFSET, offsetof (struct pt_regs, f7)); + DEFINE(IA64_PT_REGS_F8_OFFSET, offsetof (struct pt_regs, f8)); + DEFINE(IA64_PT_REGS_F9_OFFSET, offsetof (struct pt_regs, f9)); + DEFINE(IA64_PT_REGS_F10_OFFSET, offsetof (struct pt_regs, f10)); + DEFINE(IA64_PT_REGS_F11_OFFSET, offsetof (struct pt_regs, f11)); + + BLANK(); + + DEFINE(IA64_SWITCH_STACK_CALLER_UNAT_OFFSET, offsetof (struct switch_stack, caller_unat)); + DEFINE(IA64_SWITCH_STACK_AR_FPSR_OFFSET, offsetof (struct switch_stack, ar_fpsr)); + DEFINE(IA64_SWITCH_STACK_F2_OFFSET, offsetof (struct switch_stack, f2)); + DEFINE(IA64_SWITCH_STACK_F3_OFFSET, offsetof (struct switch_stack, f3)); + DEFINE(IA64_SWITCH_STACK_F4_OFFSET, offsetof (struct switch_stack, f4)); + DEFINE(IA64_SWITCH_STACK_F5_OFFSET, offsetof (struct switch_stack, f5)); + DEFINE(IA64_SWITCH_STACK_F12_OFFSET, offsetof (struct switch_stack, f12)); + DEFINE(IA64_SWITCH_STACK_F13_OFFSET, offsetof (struct switch_stack, f13)); + DEFINE(IA64_SWITCH_STACK_F14_OFFSET, offsetof (struct switch_stack, f14)); + DEFINE(IA64_SWITCH_STACK_F15_OFFSET, offsetof (struct switch_stack, f15)); + DEFINE(IA64_SWITCH_STACK_F16_OFFSET, offsetof (struct switch_stack, f16)); + DEFINE(IA64_SWITCH_STACK_F17_OFFSET, offsetof (struct switch_stack, f17)); + DEFINE(IA64_SWITCH_STACK_F18_OFFSET, offsetof (struct switch_stack, f18)); + DEFINE(IA64_SWITCH_STACK_F19_OFFSET, offsetof (struct switch_stack, f19)); + DEFINE(IA64_SWITCH_STACK_F20_OFFSET, offsetof (struct switch_stack, f20)); + DEFINE(IA64_SWITCH_STACK_F21_OFFSET, offsetof (struct switch_stack, f21)); + DEFINE(IA64_SWITCH_STACK_F22_OFFSET, offsetof (struct switch_stack, f22)); + DEFINE(IA64_SWITCH_STACK_F23_OFFSET, offsetof (struct switch_stack, f23)); + DEFINE(IA64_SWITCH_STACK_F24_OFFSET, offsetof (struct switch_stack, f24)); + DEFINE(IA64_SWITCH_STACK_F25_OFFSET, offsetof (struct switch_stack, f25)); + DEFINE(IA64_SWITCH_STACK_F26_OFFSET, offsetof (struct switch_stack, f26)); + DEFINE(IA64_SWITCH_STACK_F27_OFFSET, offsetof (struct switch_stack, f27)); + DEFINE(IA64_SWITCH_STACK_F28_OFFSET, offsetof (struct switch_stack, f28)); + DEFINE(IA64_SWITCH_STACK_F29_OFFSET, offsetof (struct switch_stack, f29)); + DEFINE(IA64_SWITCH_STACK_F30_OFFSET, offsetof (struct switch_stack, f30)); + DEFINE(IA64_SWITCH_STACK_F31_OFFSET, offsetof (struct switch_stack, f31)); + DEFINE(IA64_SWITCH_STACK_R4_OFFSET, offsetof (struct switch_stack, r4)); + DEFINE(IA64_SWITCH_STACK_R5_OFFSET, offsetof (struct switch_stack, r5)); + DEFINE(IA64_SWITCH_STACK_R6_OFFSET, offsetof (struct switch_stack, r6)); + DEFINE(IA64_SWITCH_STACK_R7_OFFSET, offsetof (struct switch_stack, r7)); + DEFINE(IA64_SWITCH_STACK_B0_OFFSET, offsetof (struct switch_stack, b0)); + DEFINE(IA64_SWITCH_STACK_B1_OFFSET, offsetof (struct switch_stack, b1)); + DEFINE(IA64_SWITCH_STACK_B2_OFFSET, offsetof (struct switch_stack, b2)); + DEFINE(IA64_SWITCH_STACK_B3_OFFSET, offsetof (struct switch_stack, b3)); + DEFINE(IA64_SWITCH_STACK_B4_OFFSET, offsetof (struct switch_stack, b4)); + DEFINE(IA64_SWITCH_STACK_B5_OFFSET, offsetof (struct switch_stack, b5)); + DEFINE(IA64_SWITCH_STACK_AR_PFS_OFFSET, offsetof (struct switch_stack, ar_pfs)); + DEFINE(IA64_SWITCH_STACK_AR_LC_OFFSET, offsetof (struct switch_stack, ar_lc)); + DEFINE(IA64_SWITCH_STACK_AR_UNAT_OFFSET, offsetof (struct switch_stack, ar_unat)); + DEFINE(IA64_SWITCH_STACK_AR_RNAT_OFFSET, offsetof (struct switch_stack, ar_rnat)); + DEFINE(IA64_SWITCH_STACK_AR_BSPSTORE_OFFSET, offsetof (struct switch_stack, ar_bspstore)); + DEFINE(IA64_SWITCH_STACK_PR_OFFSET, offsetof (struct switch_stack, pr)); + + BLANK(); + + DEFINE(IA64_SIGCONTEXT_IP_OFFSET, offsetof (struct sigcontext, sc_ip)); + DEFINE(IA64_SIGCONTEXT_AR_BSP_OFFSET, offsetof (struct sigcontext, sc_ar_bsp)); + DEFINE(IA64_SIGCONTEXT_AR_FPSR_OFFSET, offsetof (struct sigcontext, sc_ar_fpsr)); + DEFINE(IA64_SIGCONTEXT_AR_RNAT_OFFSET, offsetof (struct sigcontext, sc_ar_rnat)); + DEFINE(IA64_SIGCONTEXT_AR_UNAT_OFFSET, offsetof (struct sigcontext, sc_ar_unat)); + DEFINE(IA64_SIGCONTEXT_B0_OFFSET, offsetof (struct sigcontext, sc_br[0])); + DEFINE(IA64_SIGCONTEXT_CFM_OFFSET, offsetof (struct sigcontext, sc_cfm)); + DEFINE(IA64_SIGCONTEXT_FLAGS_OFFSET, offsetof (struct sigcontext, sc_flags)); + DEFINE(IA64_SIGCONTEXT_FR6_OFFSET, offsetof (struct sigcontext, sc_fr[6])); + DEFINE(IA64_SIGCONTEXT_PR_OFFSET, offsetof (struct sigcontext, sc_pr)); + DEFINE(IA64_SIGCONTEXT_R12_OFFSET, offsetof (struct sigcontext, sc_gr[12])); + DEFINE(IA64_SIGCONTEXT_RBS_BASE_OFFSET,offsetof (struct sigcontext, sc_rbs_base)); + DEFINE(IA64_SIGCONTEXT_LOADRS_OFFSET, offsetof (struct sigcontext, sc_loadrs)); + + BLANK(); + + DEFINE(IA64_SIGPENDING_SIGNAL_OFFSET, offsetof (struct sigpending, signal)); + + BLANK(); + + DEFINE(IA64_SIGFRAME_ARG0_OFFSET, offsetof (struct sigframe, arg0)); + DEFINE(IA64_SIGFRAME_ARG1_OFFSET, offsetof (struct sigframe, arg1)); + DEFINE(IA64_SIGFRAME_ARG2_OFFSET, offsetof (struct sigframe, arg2)); + DEFINE(IA64_SIGFRAME_HANDLER_OFFSET, offsetof (struct sigframe, handler)); + DEFINE(IA64_SIGFRAME_SIGCONTEXT_OFFSET, offsetof (struct sigframe, sc)); + BLANK(); + /* for assembly files which can't include sched.h: */ + DEFINE(IA64_CLONE_VFORK, CLONE_VFORK); + DEFINE(IA64_CLONE_VM, CLONE_VM); + + BLANK(); + DEFINE(IA64_CPUINFO_NSEC_PER_CYC_OFFSET, + offsetof (struct cpuinfo_ia64, nsec_per_cyc)); + DEFINE(IA64_CPUINFO_PTCE_BASE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_base)); + DEFINE(IA64_CPUINFO_PTCE_COUNT_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_count)); + DEFINE(IA64_CPUINFO_PTCE_STRIDE_OFFSET, + offsetof (struct cpuinfo_ia64, ptce_stride)); + BLANK(); + DEFINE(IA64_TIMESPEC_TV_NSEC_OFFSET, + offsetof (struct __kernel_old_timespec, tv_nsec)); + DEFINE(IA64_TIME_SN_SPEC_SNSEC_OFFSET, + offsetof (struct time_sn_spec, snsec)); + + DEFINE(CLONE_SETTLS_BIT, 19); +#if CLONE_SETTLS != (1<<19) +# error "CLONE_SETTLS_BIT incorrect, please fix" +#endif + + BLANK(); + DEFINE(IA64_MCA_CPU_MCA_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, mca_stack)); + DEFINE(IA64_MCA_CPU_INIT_STACK_OFFSET, + offsetof (struct ia64_mca_cpu, init_stack)); + BLANK(); + DEFINE(IA64_SAL_OS_STATE_OS_GP_OFFSET, + offsetof (struct ia64_sal_os_state, os_gp)); + DEFINE(IA64_SAL_OS_STATE_PROC_STATE_PARAM_OFFSET, + offsetof (struct ia64_sal_os_state, proc_state_param)); + DEFINE(IA64_SAL_OS_STATE_SAL_RA_OFFSET, + offsetof (struct ia64_sal_os_state, sal_ra)); + DEFINE(IA64_SAL_OS_STATE_SAL_GP_OFFSET, + offsetof (struct ia64_sal_os_state, sal_gp)); + DEFINE(IA64_SAL_OS_STATE_PAL_MIN_STATE_OFFSET, + offsetof (struct ia64_sal_os_state, pal_min_state)); + DEFINE(IA64_SAL_OS_STATE_OS_STATUS_OFFSET, + offsetof (struct ia64_sal_os_state, os_status)); + DEFINE(IA64_SAL_OS_STATE_CONTEXT_OFFSET, + offsetof (struct ia64_sal_os_state, context)); + DEFINE(IA64_SAL_OS_STATE_SIZE, + sizeof (struct ia64_sal_os_state)); + BLANK(); + + DEFINE(IA64_PMSA_GR_OFFSET, + offsetof(struct pal_min_state_area, pmsa_gr)); + DEFINE(IA64_PMSA_BANK1_GR_OFFSET, + offsetof(struct pal_min_state_area, pmsa_bank1_gr)); + DEFINE(IA64_PMSA_PR_OFFSET, + offsetof(struct pal_min_state_area, pmsa_pr)); + DEFINE(IA64_PMSA_BR0_OFFSET, + offsetof(struct pal_min_state_area, pmsa_br0)); + DEFINE(IA64_PMSA_RSC_OFFSET, + offsetof(struct pal_min_state_area, pmsa_rsc)); + DEFINE(IA64_PMSA_IIP_OFFSET, + offsetof(struct pal_min_state_area, pmsa_iip)); + DEFINE(IA64_PMSA_IPSR_OFFSET, + offsetof(struct pal_min_state_area, pmsa_ipsr)); + DEFINE(IA64_PMSA_IFS_OFFSET, + offsetof(struct pal_min_state_area, pmsa_ifs)); + DEFINE(IA64_PMSA_XIP_OFFSET, + offsetof(struct pal_min_state_area, pmsa_xip)); + BLANK(); + + /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */ + DEFINE(IA64_GTOD_SEQ_OFFSET, + offsetof (struct fsyscall_gtod_data_t, seq)); + DEFINE(IA64_GTOD_WALL_TIME_OFFSET, + offsetof (struct fsyscall_gtod_data_t, wall_time)); + DEFINE(IA64_GTOD_MONO_TIME_OFFSET, + offsetof (struct fsyscall_gtod_data_t, monotonic_time)); + DEFINE(IA64_CLKSRC_MASK_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_mask)); + DEFINE(IA64_CLKSRC_MULT_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_mult)); + DEFINE(IA64_CLKSRC_SHIFT_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_shift)); + DEFINE(IA64_CLKSRC_MMIO_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_fsys_mmio)); + DEFINE(IA64_CLKSRC_CYCLE_LAST_OFFSET, + offsetof (struct fsyscall_gtod_data_t, clk_cycle_last)); + DEFINE(IA64_ITC_JITTER_OFFSET, + offsetof (struct itc_jitter_data_t, itc_jitter)); + DEFINE(IA64_ITC_LASTCYCLE_OFFSET, + offsetof (struct itc_jitter_data_t, itc_lastcycle)); + +} diff --git a/arch/ia64/kernel/audit.c b/arch/ia64/kernel/audit.c new file mode 100644 index 000000000000..ec61f20ca61f --- /dev/null +++ b/arch/ia64/kernel/audit.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +static unsigned dir_class[] = { +#include +~0U +}; + +static unsigned read_class[] = { +#include +~0U +}; + +static unsigned write_class[] = { +#include +~0U +}; + +static unsigned chattr_class[] = { +#include +~0U +}; + +static unsigned signal_class[] = { +#include +~0U +}; + +int audit_classify_arch(int arch) +{ + return 0; +} + +int audit_classify_syscall(int abi, unsigned syscall) +{ + switch(syscall) { + case __NR_open: + return AUDITSC_OPEN; + case __NR_openat: + return AUDITSC_OPENAT; + case __NR_execve: + return AUDITSC_EXECVE; + case __NR_openat2: + return AUDITSC_OPENAT2; + default: + return AUDITSC_NATIVE; + } +} + +static int __init audit_classes_init(void) +{ + audit_register_class(AUDIT_CLASS_WRITE, write_class); + audit_register_class(AUDIT_CLASS_READ, read_class); + audit_register_class(AUDIT_CLASS_DIR_WRITE, dir_class); + audit_register_class(AUDIT_CLASS_CHATTR, chattr_class); + audit_register_class(AUDIT_CLASS_SIGNAL, signal_class); + return 0; +} + +__initcall(audit_classes_init); diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c new file mode 100644 index 000000000000..782c481d7052 --- /dev/null +++ b/arch/ia64/kernel/brl_emu.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Emulation of the "brl" instruction for IA64 processors that + * don't support it in hardware. + * Author: Stephan Zeisset, Intel Corp. + * + * 02/22/02 D. Mosberger Clear si_flgs, si_isr, and si_imm to avoid + * leaking kernel bits. + */ + +#include +#include +#include +#include + +extern char ia64_set_b1, ia64_set_b2, ia64_set_b3, ia64_set_b4, ia64_set_b5; + +struct illegal_op_return { + unsigned long fkt, arg1, arg2, arg3; +}; + +/* + * The unimplemented bits of a virtual address must be set + * to the value of the most significant implemented bit. + * unimpl_va_mask includes all unimplemented bits and + * the most significant implemented bit, so the result + * of an and operation with the mask must be all 0's + * or all 1's for the address to be valid. + */ +#define unimplemented_virtual_address(va) ( \ + ((va) & local_cpu_data->unimpl_va_mask) != 0 && \ + ((va) & local_cpu_data->unimpl_va_mask) != local_cpu_data->unimpl_va_mask \ +) + +/* + * The unimplemented bits of a physical address must be 0. + * unimpl_pa_mask includes all unimplemented bits, so the result + * of an and operation with the mask must be all 0's for the + * address to be valid. + */ +#define unimplemented_physical_address(pa) ( \ + ((pa) & local_cpu_data->unimpl_pa_mask) != 0 \ +) + +/* + * Handle an illegal operation fault that was caused by an + * unimplemented "brl" instruction. + * If we are not successful (e.g because the illegal operation + * wasn't caused by a "brl" after all), we return -1. + * If we are successful, we return either 0 or the address + * of a "fixup" function for manipulating preserved register + * state. + */ + +struct illegal_op_return +ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) +{ + unsigned long bundle[2]; + unsigned long opcode, btype, qp, offset, cpl; + unsigned long next_ip; + struct illegal_op_return rv; + long tmp_taken, unimplemented_address; + + rv.fkt = (unsigned long) -1; + + /* + * Decode the instruction bundle. + */ + + if (copy_from_user(bundle, (void *) (regs->cr_iip), sizeof(bundle))) + return rv; + + next_ip = (unsigned long) regs->cr_iip + 16; + + /* "brl" must be in slot 2. */ + if (ia64_psr(regs)->ri != 1) return rv; + + /* Must be "mlx" template */ + if ((bundle[0] & 0x1e) != 0x4) return rv; + + opcode = (bundle[1] >> 60); + btype = ((bundle[1] >> 29) & 0x7); + qp = ((bundle[1] >> 23) & 0x3f); + offset = ((bundle[1] & 0x0800000000000000L) << 4) + | ((bundle[1] & 0x00fffff000000000L) >> 32) + | ((bundle[1] & 0x00000000007fffffL) << 40) + | ((bundle[0] & 0xffff000000000000L) >> 24); + + tmp_taken = regs->pr & (1L << qp); + + switch(opcode) { + + case 0xC: + /* + * Long Branch. + */ + if (btype != 0) return rv; + rv.fkt = 0; + if (!(tmp_taken)) { + /* + * Qualifying predicate is 0. + * Skip instruction. + */ + regs->cr_iip = next_ip; + ia64_psr(regs)->ri = 0; + return rv; + } + break; + + case 0xD: + /* + * Long Call. + */ + rv.fkt = 0; + if (!(tmp_taken)) { + /* + * Qualifying predicate is 0. + * Skip instruction. + */ + regs->cr_iip = next_ip; + ia64_psr(regs)->ri = 0; + return rv; + } + + /* + * BR[btype] = IP+16 + */ + switch(btype) { + case 0: + regs->b0 = next_ip; + break; + case 1: + rv.fkt = (unsigned long) &ia64_set_b1; + break; + case 2: + rv.fkt = (unsigned long) &ia64_set_b2; + break; + case 3: + rv.fkt = (unsigned long) &ia64_set_b3; + break; + case 4: + rv.fkt = (unsigned long) &ia64_set_b4; + break; + case 5: + rv.fkt = (unsigned long) &ia64_set_b5; + break; + case 6: + regs->b6 = next_ip; + break; + case 7: + regs->b7 = next_ip; + break; + } + rv.arg1 = next_ip; + + /* + * AR[PFS].pfm = CFM + * AR[PFS].pec = AR[EC] + * AR[PFS].ppl = PSR.cpl + */ + cpl = ia64_psr(regs)->cpl; + regs->ar_pfs = ((regs->cr_ifs & 0x3fffffffff) + | (ar_ec << 52) | (cpl << 62)); + + /* + * CFM.sof -= CFM.sol + * CFM.sol = 0 + * CFM.sor = 0 + * CFM.rrb.gr = 0 + * CFM.rrb.fr = 0 + * CFM.rrb.pr = 0 + */ + regs->cr_ifs = ((regs->cr_ifs & 0xffffffc00000007f) + - ((regs->cr_ifs >> 7) & 0x7f)); + + break; + + default: + /* + * Unknown opcode. + */ + return rv; + + } + + regs->cr_iip += offset; + ia64_psr(regs)->ri = 0; + + if (ia64_psr(regs)->it == 0) + unimplemented_address = unimplemented_physical_address(regs->cr_iip); + else + unimplemented_address = unimplemented_virtual_address(regs->cr_iip); + + if (unimplemented_address) { + /* + * The target address contains unimplemented bits. + */ + printk(KERN_DEBUG "Woah! Unimplemented Instruction Address Trap!\n"); + force_sig_fault(SIGILL, ILL_BADIADDR, (void __user *)NULL, + 0, 0, 0); + } else if (ia64_psr(regs)->tb) { + /* + * Branch Tracing is enabled. + * Force a taken branch signal. + */ + force_sig_fault(SIGTRAP, TRAP_BRANCH, (void __user *)NULL, + 0, 0, 0); + } else if (ia64_psr(regs)->ss) { + /* + * Single Step is enabled. + * Force a trace signal. + */ + force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)NULL, + 0, 0, 0); + } + return rv; +} diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c new file mode 100644 index 000000000000..88b3ce3e66cd --- /dev/null +++ b/arch/ia64/kernel/crash.c @@ -0,0 +1,257 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/ia64/kernel/crash.c + * + * Architecture specific (ia64) functions for kexec based crash dumps. + * + * Created by: Khalid Aziz + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Intel Corp Zou Nan hai + * + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +int kdump_status[NR_CPUS]; +static atomic_t kdump_cpu_frozen; +atomic_t kdump_in_progress; +static int kdump_freeze_monarch; +static int kdump_on_init = 1; +static int kdump_on_fatal_mca = 1; + +extern void ia64_dump_cpu_regs(void *); + +static DEFINE_PER_CPU(struct elf_prstatus, elf_prstatus); + +void +crash_save_this_cpu(void) +{ + void *buf; + unsigned long cfm, sof, sol; + + int cpu = smp_processor_id(); + struct elf_prstatus *prstatus = &per_cpu(elf_prstatus, cpu); + + elf_greg_t *dst = (elf_greg_t *)&(prstatus->pr_reg); + memset(prstatus, 0, sizeof(*prstatus)); + prstatus->common.pr_pid = current->pid; + + ia64_dump_cpu_regs(dst); + cfm = dst[43]; + sol = (cfm >> 7) & 0x7f; + sof = cfm & 0x7f; + dst[46] = (unsigned long)ia64_rse_skip_regs((unsigned long *)dst[46], + sof - sol); + + buf = (u64 *) per_cpu_ptr(crash_notes, cpu); + if (!buf) + return; + buf = append_elf_note(buf, KEXEC_CORE_NOTE_NAME, NT_PRSTATUS, prstatus, + sizeof(*prstatus)); + final_note(buf); +} + +#ifdef CONFIG_SMP +static int +kdump_wait_cpu_freeze(void) +{ + int cpu_num = num_online_cpus() - 1; + int timeout = 1000; + while(timeout-- > 0) { + if (atomic_read(&kdump_cpu_frozen) == cpu_num) + return 0; + udelay(1000); + } + return 1; +} +#endif + +void +machine_crash_shutdown(struct pt_regs *pt) +{ + /* This function is only called after the system + * has paniced or is otherwise in a critical state. + * The minimum amount of code to allow a kexec'd kernel + * to run successfully needs to happen here. + * + * In practice this means shooting down the other cpus in + * an SMP system. + */ + kexec_disable_iosapic(); +#ifdef CONFIG_SMP + /* + * If kdump_on_init is set and an INIT is asserted here, kdump will + * be started again via INIT monarch. + */ + local_irq_disable(); + ia64_set_psr_mc(); /* mask MCA/INIT */ + if (atomic_inc_return(&kdump_in_progress) != 1) + unw_init_running(kdump_cpu_freeze, NULL); + + /* + * Now this cpu is ready for kdump. + * Stop all others by IPI or INIT. They could receive INIT from + * outside and might be INIT monarch, but only thing they have to + * do is falling into kdump_cpu_freeze(). + * + * If an INIT is asserted here: + * - All receivers might be slaves, since some of cpus could already + * be frozen and INIT might be masked on monarch. In this case, + * all slaves will be frozen soon since kdump_in_progress will let + * them into DIE_INIT_SLAVE_LEAVE. + * - One might be a monarch, but INIT rendezvous will fail since + * at least this cpu already have INIT masked so it never join + * to the rendezvous. In this case, all slaves and monarch will + * be frozen soon with no wait since the INIT rendezvous is skipped + * by kdump_in_progress. + */ + kdump_smp_send_stop(); + /* not all cpu response to IPI, send INIT to freeze them */ + if (kdump_wait_cpu_freeze()) { + kdump_smp_send_init(); + /* wait again, don't go ahead if possible */ + kdump_wait_cpu_freeze(); + } +#endif +} + +static void +machine_kdump_on_init(void) +{ + crash_save_vmcoreinfo(); + local_irq_disable(); + kexec_disable_iosapic(); + machine_kexec(ia64_kimage); +} + +void +kdump_cpu_freeze(struct unw_frame_info *info, void *arg) +{ + int cpuid; + + local_irq_disable(); + cpuid = smp_processor_id(); + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + + ia64_set_psr_mc(); /* mask MCA/INIT and stop reentrance */ + + atomic_inc(&kdump_cpu_frozen); + kdump_status[cpuid] = 1; + mb(); + for (;;) + cpu_relax(); +} + +static int +kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) +{ + struct ia64_mca_notify_die *nd; + struct die_args *args = data; + + if (atomic_read(&kdump_in_progress)) { + switch (val) { + case DIE_INIT_MONARCH_LEAVE: + if (!kdump_freeze_monarch) + break; + fallthrough; + case DIE_INIT_SLAVE_LEAVE: + case DIE_INIT_MONARCH_ENTER: + case DIE_MCA_RENDZVOUS_LEAVE: + unw_init_running(kdump_cpu_freeze, NULL); + break; + } + } + + if (!kdump_on_init && !kdump_on_fatal_mca) + return NOTIFY_DONE; + + if (!ia64_kimage) { + if (val == DIE_INIT_MONARCH_LEAVE) + ia64_mca_printk(KERN_NOTICE + "%s: kdump not configured\n", + __func__); + return NOTIFY_DONE; + } + + if (val != DIE_INIT_MONARCH_LEAVE && + val != DIE_INIT_MONARCH_PROCESS && + val != DIE_MCA_MONARCH_LEAVE) + return NOTIFY_DONE; + + nd = (struct ia64_mca_notify_die *)args->err; + + switch (val) { + case DIE_INIT_MONARCH_PROCESS: + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) { + if (atomic_inc_return(&kdump_in_progress) != 1) + kdump_freeze_monarch = 1; + } + break; + case DIE_INIT_MONARCH_LEAVE: + /* Reason code 1 means machine check rendezvous*/ + if (kdump_on_init && (nd->sos->rv_rc != 1)) + machine_kdump_on_init(); + break; + case DIE_MCA_MONARCH_LEAVE: + /* *(nd->data) indicate if MCA is recoverable */ + if (kdump_on_fatal_mca && !(*(nd->data))) { + if (atomic_inc_return(&kdump_in_progress) == 1) + machine_kdump_on_init(); + /* We got fatal MCA while kdump!? No way!! */ + } + break; + } + return NOTIFY_DONE; +} + +#ifdef CONFIG_SYSCTL +static struct ctl_table kdump_ctl_table[] = { + { + .procname = "kdump_on_init", + .data = &kdump_on_init, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "kdump_on_fatal_mca", + .data = &kdump_on_fatal_mca, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { } +}; +#endif + +static int +machine_crash_setup(void) +{ + /* be notified before default_monarch_init_process */ + static struct notifier_block kdump_init_notifier_nb = { + .notifier_call = kdump_init_notifier, + .priority = 1, + }; + int ret; + if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) + return ret; +#ifdef CONFIG_SYSCTL + register_sysctl("kernel", kdump_ctl_table); +#endif + return 0; +} + +__initcall(machine_crash_setup); + diff --git a/arch/ia64/kernel/crash_dump.c b/arch/ia64/kernel/crash_dump.c new file mode 100644 index 000000000000..4ef68e2aa757 --- /dev/null +++ b/arch/ia64/kernel/crash_dump.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * kernel/crash_dump.c - Memory preserving reboot related code. + * + * Created by: Simon Horman + * Original code moved from kernel/crash.c + * Original code comment copied from the i386 version of this file + */ + +#include +#include +#include +#include +#include + +ssize_t copy_oldmem_page(struct iov_iter *iter, unsigned long pfn, + size_t csize, unsigned long offset) +{ + void *vaddr; + + if (!csize) + return 0; + vaddr = __va(pfn< +#include +#include +#include +#include +#include +#include + +/* IBM Summit (EXA) Cyclone counter code*/ +#define CYCLONE_CBAR_ADDR 0xFEB00CD0 +#define CYCLONE_PMCC_OFFSET 0x51A0 +#define CYCLONE_MPMC_OFFSET 0x51D0 +#define CYCLONE_MPCS_OFFSET 0x51A8 +#define CYCLONE_TIMER_FREQ 100000000 + +int use_cyclone; +void __init cyclone_setup(void) +{ + use_cyclone = 1; +} + +static void __iomem *cyclone_mc; + +static u64 read_cyclone(struct clocksource *cs) +{ + return (u64)readq((void __iomem *)cyclone_mc); +} + +static struct clocksource clocksource_cyclone = { + .name = "cyclone", + .rating = 300, + .read = read_cyclone, + .mask = (1LL << 40) - 1, + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; + +int __init init_cyclone_clock(void) +{ + u64 __iomem *reg; + u64 base; /* saved cyclone base address */ + u64 offset; /* offset from pageaddr to cyclone_timer register */ + int i; + u32 __iomem *cyclone_timer; /* Cyclone MPMC0 register */ + + if (!use_cyclone) + return 0; + + printk(KERN_INFO "Summit chipset: Starting Cyclone Counter.\n"); + + /* find base address */ + offset = (CYCLONE_CBAR_ADDR); + reg = ioremap(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid CBAR" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + base = readq(reg); + iounmap(reg); + if(!base){ + printk(KERN_ERR "Summit chipset: Could not find valid CBAR" + " value.\n"); + use_cyclone = 0; + return -ENODEV; + } + + /* setup PMCC */ + offset = (base + CYCLONE_PMCC_OFFSET); + reg = ioremap(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid PMCC" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* setup MPCS */ + offset = (base + CYCLONE_MPCS_OFFSET); + reg = ioremap(offset, sizeof(u64)); + if(!reg){ + printk(KERN_ERR "Summit chipset: Could not find valid MPCS" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + writel(0x00000001,reg); + iounmap(reg); + + /* map in cyclone_timer */ + offset = (base + CYCLONE_MPMC_OFFSET); + cyclone_timer = ioremap(offset, sizeof(u32)); + if(!cyclone_timer){ + printk(KERN_ERR "Summit chipset: Could not find valid MPMC" + " register.\n"); + use_cyclone = 0; + return -ENODEV; + } + + /*quick test to make sure its ticking*/ + for(i=0; i<3; i++){ + u32 old = readl(cyclone_timer); + int stall = 100; + while(stall--) barrier(); + if(readl(cyclone_timer) == old){ + printk(KERN_ERR "Summit chipset: Counter not counting!" + " DISABLED\n"); + iounmap(cyclone_timer); + cyclone_timer = NULL; + use_cyclone = 0; + return -ENODEV; + } + } + /* initialize last tick */ + cyclone_mc = cyclone_timer; + clocksource_cyclone.archdata.fsys_mmio = cyclone_timer; + clocksource_register_hz(&clocksource_cyclone, CYCLONE_TIMER_FREQ); + + return 0; +} + +__initcall(init_cyclone_clock); diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c new file mode 100644 index 000000000000..cd0c166bfbc2 --- /dev/null +++ b/arch/ia64/kernel/dma-mapping.c @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +/* Set this to 1 if there is a HW IOMMU in the system */ +int iommu_detected __read_mostly; + +const struct dma_map_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); diff --git a/arch/ia64/kernel/efi.c b/arch/ia64/kernel/efi.c new file mode 100644 index 000000000000..033f5aead88a --- /dev/null +++ b/arch/ia64/kernel/efi.c @@ -0,0 +1,1360 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 0.9 + * April 30, 1999 + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2003 Hewlett-Packard Co. + * David Mosberger-Tang + * Stephane Eranian + * (c) Copyright 2006 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + * + * All EFI Runtime Services are not implemented yet as EFI only + * supports physical mode addressing on SoftSDV. This is to be fixed + * in a future version. --drummond 1999-07-20 + * + * Implemented EFI runtime services and virtual mode calls. --davidm + * + * Goutham Rao: + * Skip non-WB memory and ignore empty memory ranges. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define EFI_DEBUG 0 + +#define ESI_TABLE_GUID \ + EFI_GUID(0x43EA58DC, 0xCF28, 0x4b06, 0xB3, \ + 0x91, 0xB7, 0x50, 0x59, 0x34, 0x2B, 0xD4) + +static unsigned long mps_phys = EFI_INVALID_TABLE_ADDR; +static __initdata unsigned long palo_phys; + +unsigned long __initdata esi_phys = EFI_INVALID_TABLE_ADDR; +unsigned long hcdp_phys = EFI_INVALID_TABLE_ADDR; +unsigned long sal_systab_phys = EFI_INVALID_TABLE_ADDR; + +static const efi_config_table_type_t arch_tables[] __initconst = { + {ESI_TABLE_GUID, &esi_phys, "ESI" }, + {HCDP_TABLE_GUID, &hcdp_phys, "HCDP" }, + {MPS_TABLE_GUID, &mps_phys, "MPS" }, + {PROCESSOR_ABSTRACTION_LAYER_OVERWRITE_GUID, &palo_phys, "PALO" }, + {SAL_SYSTEM_TABLE_GUID, &sal_systab_phys, "SALsystab" }, + {}, +}; + +extern efi_status_t efi_call_phys (void *, ...); + +static efi_runtime_services_t *runtime; +static u64 mem_limit = ~0UL, max_addr = ~0UL, min_addr = 0UL; + +#define efi_call_virt(f, args...) (*(f))(args) + +#define STUB_GET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_time (efi_time_t *tm, efi_time_cap_t *tc) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_cap_t *atc = NULL; \ + efi_status_t ret; \ + \ + if (tc) \ + atc = adjust_arg(tc); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_time_t *) __va(runtime->get_time), \ + adjust_arg(tm), atc); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_time (efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_set_time_t *) __va(runtime->set_time), \ + adjust_arg(tm)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_WAKEUP_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_wakeup_time (efi_bool_t *enabled, efi_bool_t *pending, \ + efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_get_wakeup_time_t *) __va(runtime->get_wakeup_time), \ + adjust_arg(enabled), adjust_arg(pending), adjust_arg(tm)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_WAKEUP_TIME(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_wakeup_time (efi_bool_t enabled, efi_time_t *tm) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_time_t *atm = NULL; \ + efi_status_t ret; \ + \ + if (tm) \ + atm = adjust_arg(tm); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_set_wakeup_time_t *) __va(runtime->set_wakeup_time), \ + enabled, atm); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_variable (efi_char16_t *name, efi_guid_t *vendor, u32 *attr, \ + unsigned long *data_size, void *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + u32 *aattr = NULL; \ + efi_status_t ret; \ + \ + if (attr) \ + aattr = adjust_arg(attr); \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_get_variable_t *) __va(runtime->get_variable), \ + adjust_arg(name), adjust_arg(vendor), aattr, \ + adjust_arg(data_size), adjust_arg(data)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_NEXT_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_next_variable (unsigned long *name_size, efi_char16_t *name, \ + efi_guid_t *vendor) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_get_next_variable_t *) __va(runtime->get_next_variable), \ + adjust_arg(name_size), adjust_arg(name), adjust_arg(vendor)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_SET_VARIABLE(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_set_variable (efi_char16_t *name, efi_guid_t *vendor, \ + u32 attr, unsigned long data_size, \ + void *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix( \ + (efi_set_variable_t *) __va(runtime->set_variable), \ + adjust_arg(name), adjust_arg(vendor), attr, data_size, \ + adjust_arg(data)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_GET_NEXT_HIGH_MONO_COUNT(prefix, adjust_arg) \ +static efi_status_t \ +prefix##_get_next_high_mono_count (u32 *count) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_status_t ret; \ + \ + ia64_save_scratch_fpregs(fr); \ + ret = efi_call_##prefix((efi_get_next_high_mono_count_t *) \ + __va(runtime->get_next_high_mono_count), \ + adjust_arg(count)); \ + ia64_load_scratch_fpregs(fr); \ + return ret; \ +} + +#define STUB_RESET_SYSTEM(prefix, adjust_arg) \ +static void \ +prefix##_reset_system (int reset_type, efi_status_t status, \ + unsigned long data_size, efi_char16_t *data) \ +{ \ + struct ia64_fpreg fr[6]; \ + efi_char16_t *adata = NULL; \ + \ + if (data) \ + adata = adjust_arg(data); \ + \ + ia64_save_scratch_fpregs(fr); \ + efi_call_##prefix( \ + (efi_reset_system_t *) __va(runtime->reset_system), \ + reset_type, status, data_size, adata); \ + /* should not return, but just in case... */ \ + ia64_load_scratch_fpregs(fr); \ +} + +#define phys_ptr(arg) ((__typeof__(arg)) ia64_tpa(arg)) + +STUB_GET_TIME(phys, phys_ptr) +STUB_SET_TIME(phys, phys_ptr) +STUB_GET_WAKEUP_TIME(phys, phys_ptr) +STUB_SET_WAKEUP_TIME(phys, phys_ptr) +STUB_GET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_VARIABLE(phys, phys_ptr) +STUB_SET_VARIABLE(phys, phys_ptr) +STUB_GET_NEXT_HIGH_MONO_COUNT(phys, phys_ptr) +STUB_RESET_SYSTEM(phys, phys_ptr) + +#define id(arg) arg + +STUB_GET_TIME(virt, id) +STUB_SET_TIME(virt, id) +STUB_GET_WAKEUP_TIME(virt, id) +STUB_SET_WAKEUP_TIME(virt, id) +STUB_GET_VARIABLE(virt, id) +STUB_GET_NEXT_VARIABLE(virt, id) +STUB_SET_VARIABLE(virt, id) +STUB_GET_NEXT_HIGH_MONO_COUNT(virt, id) +STUB_RESET_SYSTEM(virt, id) + +void +efi_gettimeofday (struct timespec64 *ts) +{ + efi_time_t tm; + + if ((*efi.get_time)(&tm, NULL) != EFI_SUCCESS) { + memset(ts, 0, sizeof(*ts)); + return; + } + + ts->tv_sec = mktime64(tm.year, tm.month, tm.day, + tm.hour, tm.minute, tm.second); + ts->tv_nsec = tm.nanosecond; +} + +static int +is_memory_available (efi_memory_desc_t *md) +{ + if (!(md->attribute & EFI_MEMORY_WB)) + return 0; + + switch (md->type) { + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_BOOT_SERVICES_DATA: + case EFI_CONVENTIONAL_MEMORY: + return 1; + } + return 0; +} + +typedef struct kern_memdesc { + u64 attribute; + u64 start; + u64 num_pages; +} kern_memdesc_t; + +static kern_memdesc_t *kern_memmap; + +#define efi_md_size(md) (md->num_pages << EFI_PAGE_SHIFT) + +static inline u64 +kmd_end(kern_memdesc_t *kmd) +{ + return (kmd->start + (kmd->num_pages << EFI_PAGE_SHIFT)); +} + +static inline u64 +efi_md_end(efi_memory_desc_t *md) +{ + return (md->phys_addr + efi_md_size(md)); +} + +static inline int +efi_wb(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_WB); +} + +static inline int +efi_uc(efi_memory_desc_t *md) +{ + return (md->attribute & EFI_MEMORY_UC); +} + +static void +walk (efi_freemem_callback_t callback, void *arg, u64 attr) +{ + kern_memdesc_t *k; + u64 start, end, voff; + + voff = (attr == EFI_MEMORY_WB) ? PAGE_OFFSET : __IA64_UNCACHED_OFFSET; + for (k = kern_memmap; k->start != ~0UL; k++) { + if (k->attribute != attr) + continue; + start = PAGE_ALIGN(k->start); + end = (k->start + (k->num_pages << EFI_PAGE_SHIFT)) & PAGE_MASK; + if (start < end) + if ((*callback)(start + voff, end + voff, arg) < 0) + return; + } +} + +/* + * Walk the EFI memory map and call CALLBACK once for each EFI memory + * descriptor that has memory that is available for OS use. + */ +void +efi_memmap_walk (efi_freemem_callback_t callback, void *arg) +{ + walk(callback, arg, EFI_MEMORY_WB); +} + +/* + * Walk the EFI memory map and call CALLBACK once for each EFI memory + * descriptor that has memory that is available for uncached allocator. + */ +void +efi_memmap_walk_uc (efi_freemem_callback_t callback, void *arg) +{ + walk(callback, arg, EFI_MEMORY_UC); +} + +/* + * Look for the PAL_CODE region reported by EFI and map it using an + * ITR to enable safe PAL calls in virtual mode. See IA-64 Processor + * Abstraction Layer chapter 11 in ADAG + */ +void * +efi_get_pal_addr (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + int pal_code_count = 0; + u64 vaddr, mask; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type != EFI_PAL_CODE) + continue; + + if (++pal_code_count > 1) { + printk(KERN_ERR "Too many EFI Pal Code memory ranges, " + "dropped @ %llx\n", md->phys_addr); + continue; + } + /* + * The only ITLB entry in region 7 that is used is the one + * installed by __start(). That entry covers a 64MB range. + */ + mask = ~((1 << KERNEL_TR_PAGE_SHIFT) - 1); + vaddr = PAGE_OFFSET + md->phys_addr; + + /* + * We must check that the PAL mapping won't overlap with the + * kernel mapping. + * + * PAL code is guaranteed to be aligned on a power of 2 between + * 4k and 256KB and that only one ITR is needed to map it. This + * implies that the PAL code is always aligned on its size, + * i.e., the closest matching page size supported by the TLB. + * Therefore PAL code is guaranteed never to cross a 64MB unless + * it is bigger than 64MB (very unlikely!). So for now the + * following test is enough to determine whether or not we need + * a dedicated ITR for the PAL code. + */ + if ((vaddr & mask) == (KERNEL_START & mask)) { + printk(KERN_INFO "%s: no need to install ITR for PAL code\n", + __func__); + continue; + } + + if (efi_md_size(md) > IA64_GRANULE_SIZE) + panic("Whoa! PAL code size bigger than a granule!"); + +#if EFI_DEBUG + mask = ~((1 << IA64_GRANULE_SHIFT) - 1); + + printk(KERN_INFO "CPU %d: mapping PAL code " + "[0x%llx-0x%llx) into [0x%llx-0x%llx)\n", + smp_processor_id(), md->phys_addr, + md->phys_addr + efi_md_size(md), + vaddr & mask, (vaddr & mask) + IA64_GRANULE_SIZE); +#endif + return __va(md->phys_addr); + } + printk(KERN_WARNING "%s: no PAL-code memory-descriptor found\n", + __func__); + return NULL; +} + + +static u8 __init palo_checksum(u8 *buffer, u32 length) +{ + u8 sum = 0; + u8 *end = buffer + length; + + while (buffer < end) + sum = (u8) (sum + *(buffer++)); + + return sum; +} + +/* + * Parse and handle PALO table which is published at: + * http://www.dig64.org/home/DIG64_PALO_R1_0.pdf + */ +static void __init handle_palo(unsigned long phys_addr) +{ + struct palo_table *palo = __va(phys_addr); + u8 checksum; + + if (strncmp(palo->signature, PALO_SIG, sizeof(PALO_SIG) - 1)) { + printk(KERN_INFO "PALO signature incorrect.\n"); + return; + } + + checksum = palo_checksum((u8 *)palo, palo->length); + if (checksum) { + printk(KERN_INFO "PALO checksum incorrect.\n"); + return; + } + + setup_ptcg_sem(palo->max_tlb_purges, NPTCG_FROM_PALO); +} + +void +efi_map_pal_code (void) +{ + void *pal_vaddr = efi_get_pal_addr (); + u64 psr; + + if (!pal_vaddr) + return; + + /* + * Cannot write to CRx with PSR.ic=1 + */ + psr = ia64_clear_ic(); + ia64_itr(0x1, IA64_TR_PALCODE, + GRANULEROUNDDOWN((unsigned long) pal_vaddr), + pte_val(pfn_pte(__pa(pal_vaddr) >> PAGE_SHIFT, PAGE_KERNEL)), + IA64_GRANULE_SHIFT); + ia64_set_psr(psr); /* restore psr */ +} + +void __init +efi_init (void) +{ + const efi_system_table_t *efi_systab; + void *efi_map_start, *efi_map_end; + u64 efi_desc_size; + char *cp; + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * It's too early to be able to use the standard kernel command line + * support... + */ + for (cp = boot_command_line; *cp; ) { + if (memcmp(cp, "mem=", 4) == 0) { + mem_limit = memparse(cp + 4, &cp); + } else if (memcmp(cp, "max_addr=", 9) == 0) { + max_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else if (memcmp(cp, "min_addr=", 9) == 0) { + min_addr = GRANULEROUNDDOWN(memparse(cp + 9, &cp)); + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } + if (min_addr != 0UL) + printk(KERN_INFO "Ignoring memory below %lluMB\n", + min_addr >> 20); + if (max_addr != ~0UL) + printk(KERN_INFO "Ignoring memory above %lluMB\n", + max_addr >> 20); + + efi_systab = __va(ia64_boot_param->efi_systab); + + /* + * Verify the EFI Table + */ + if (efi_systab == NULL) + panic("Whoa! Can't find EFI system table.\n"); + if (efi_systab_check_header(&efi_systab->hdr)) + panic("Whoa! EFI system table signature incorrect\n"); + + efi_systab_report_header(&efi_systab->hdr, efi_systab->fw_vendor); + + palo_phys = EFI_INVALID_TABLE_ADDR; + + if (efi_config_parse_tables(__va(efi_systab->tables), + efi_systab->nr_tables, + arch_tables) != 0) + return; + + if (palo_phys != EFI_INVALID_TABLE_ADDR) + handle_palo(palo_phys); + + runtime = __va(efi_systab->runtime); + efi.get_time = phys_get_time; + efi.set_time = phys_set_time; + efi.get_wakeup_time = phys_get_wakeup_time; + efi.set_wakeup_time = phys_set_wakeup_time; + efi.get_variable = phys_get_variable; + efi.get_next_variable = phys_get_next_variable; + efi.set_variable = phys_set_variable; + efi.get_next_high_mono_count = phys_get_next_high_mono_count; + efi.reset_system = phys_reset_system; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + +#if EFI_DEBUG + /* print EFI memory map: */ + { + efi_memory_desc_t *md; + void *p; + unsigned int i; + + for (i = 0, p = efi_map_start; p < efi_map_end; + ++i, p += efi_desc_size) + { + const char *unit; + unsigned long size; + char buf[64]; + + md = p; + size = md->num_pages << EFI_PAGE_SHIFT; + + if ((size >> 40) > 0) { + size >>= 40; + unit = "TB"; + } else if ((size >> 30) > 0) { + size >>= 30; + unit = "GB"; + } else if ((size >> 20) > 0) { + size >>= 20; + unit = "MB"; + } else { + size >>= 10; + unit = "KB"; + } + + printk("mem%02d: %s " + "range=[0x%016llx-0x%016llx) (%4lu%s)\n", + i, efi_md_typeattr_format(buf, sizeof(buf), md), + md->phys_addr, + md->phys_addr + efi_md_size(md), size, unit); + } + } +#endif + + efi_map_pal_code(); + efi_enter_virtual_mode(); +} + +void +efi_enter_virtual_mode (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + efi_status_t status; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->attribute & EFI_MEMORY_RUNTIME) { + /* + * Some descriptors have multiple bits set, so the + * order of the tests is relevant. + */ + if (md->attribute & EFI_MEMORY_WB) { + md->virt_addr = (u64) __va(md->phys_addr); + } else if (md->attribute & EFI_MEMORY_UC) { + md->virt_addr = (u64) ioremap(md->phys_addr, 0); + } else if (md->attribute & EFI_MEMORY_WC) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, + (_PAGE_A | + _PAGE_P | + _PAGE_D | + _PAGE_MA_WC | + _PAGE_PL_0 | + _PAGE_AR_RW)); +#else + printk(KERN_INFO "EFI_MEMORY_WC mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } else if (md->attribute & EFI_MEMORY_WT) { +#if 0 + md->virt_addr = ia64_remap(md->phys_addr, + (_PAGE_A | + _PAGE_P | + _PAGE_D | + _PAGE_MA_WT | + _PAGE_PL_0 | + _PAGE_AR_RW)); +#else + printk(KERN_INFO "EFI_MEMORY_WT mapping\n"); + md->virt_addr = (u64) ioremap(md->phys_addr, 0); +#endif + } + } + } + + status = efi_call_phys(__va(runtime->set_virtual_address_map), + ia64_boot_param->efi_memmap_size, + efi_desc_size, + ia64_boot_param->efi_memdesc_version, + ia64_boot_param->efi_memmap); + if (status != EFI_SUCCESS) { + printk(KERN_WARNING "warning: unable to switch EFI into " + "virtual mode (status=%lu)\n", status); + return; + } + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + /* + * Now that EFI is in virtual mode, we call the EFI functions more + * efficiently: + */ + efi.get_time = virt_get_time; + efi.set_time = virt_set_time; + efi.get_wakeup_time = virt_get_wakeup_time; + efi.set_wakeup_time = virt_set_wakeup_time; + efi.get_variable = virt_get_variable; + efi.get_next_variable = virt_get_next_variable; + efi.set_variable = virt_set_variable; + efi.get_next_high_mono_count = virt_get_next_high_mono_count; + efi.reset_system = virt_reset_system; +} + +/* + * Walk the EFI memory map looking for the I/O port range. There can only be + * one entry of this type, other I/O port ranges should be described via ACPI. + */ +u64 +efi_get_iobase (void) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->type == EFI_MEMORY_MAPPED_IO_PORT_SPACE) { + if (md->attribute & EFI_MEMORY_UC) + return md->phys_addr; + } + } + return 0; +} + +static struct kern_memdesc * +kern_memory_descriptor (unsigned long phys_addr) +{ + struct kern_memdesc *md; + + for (md = kern_memmap; md->start != ~0UL; md++) { + if (phys_addr - md->start < (md->num_pages << EFI_PAGE_SHIFT)) + return md; + } + return NULL; +} + +static efi_memory_desc_t * +efi_memory_descriptor (unsigned long phys_addr) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (phys_addr - md->phys_addr < efi_md_size(md)) + return md; + } + return NULL; +} + +static int +efi_memmap_intersects (unsigned long phys_addr, unsigned long size) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long end; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + end = phys_addr + size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (md->phys_addr < end && efi_md_end(md) > phys_addr) + return 1; + } + return 0; +} + +int +efi_mem_type (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->type; + return -EINVAL; +} + +u64 +efi_mem_attributes (unsigned long phys_addr) +{ + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + + if (md) + return md->attribute; + return 0; +} +EXPORT_SYMBOL(efi_mem_attributes); + +u64 +efi_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + efi_memory_desc_t *md = efi_memory_descriptor(phys_addr); + u64 attr; + + if (!md) + return 0; + + /* + * EFI_MEMORY_RUNTIME is not a memory attribute; it just tells + * the kernel that firmware needs this region mapped. + */ + attr = md->attribute & ~EFI_MEMORY_RUNTIME; + do { + unsigned long md_end = efi_md_end(md); + + if (end <= md_end) + return attr; + + md = efi_memory_descriptor(md_end); + if (!md || (md->attribute & ~EFI_MEMORY_RUNTIME) != attr) + return 0; + } while (md); + return 0; /* never reached */ +} + +u64 +kern_mem_attribute (unsigned long phys_addr, unsigned long size) +{ + unsigned long end = phys_addr + size; + struct kern_memdesc *md; + u64 attr; + + /* + * This is a hack for ioremap calls before we set up kern_memmap. + * Maybe we should do efi_memmap_init() earlier instead. + */ + if (!kern_memmap) { + attr = efi_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return EFI_MEMORY_WB; + return 0; + } + + md = kern_memory_descriptor(phys_addr); + if (!md) + return 0; + + attr = md->attribute; + do { + unsigned long md_end = kmd_end(md); + + if (end <= md_end) + return attr; + + md = kern_memory_descriptor(md_end); + if (!md || md->attribute != attr) + return 0; + } while (md); + return 0; /* never reached */ +} + +int +valid_phys_addr_range (phys_addr_t phys_addr, unsigned long size) +{ + u64 attr; + + /* + * /dev/mem reads and writes use copy_to_user(), which implicitly + * uses a granule-sized kernel identity mapping. It's really + * only safe to do this for regions in kern_memmap. For more + * details, see Documentation/arch/ia64/aliasing.rst. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + return 0; +} + +int +valid_mmap_phys_addr_range (unsigned long pfn, unsigned long size) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; + + attr = efi_mem_attribute(phys_addr, size); + + /* + * /dev/mem mmap uses normal user pages, so we don't need the entire + * granule, but the entire region we're mapping must support the same + * attribute. + */ + if (attr & EFI_MEMORY_WB || attr & EFI_MEMORY_UC) + return 1; + + /* + * Intel firmware doesn't tell us about all the MMIO regions, so + * in general we have to allow mmap requests. But if EFI *does* + * tell us about anything inside this region, we should deny it. + * The user can always map a smaller region to avoid the overlap. + */ + if (efi_memmap_intersects(phys_addr, size)) + return 0; + + return 1; +} + +pgprot_t +phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, + pgprot_t vma_prot) +{ + unsigned long phys_addr = pfn << PAGE_SHIFT; + u64 attr; + + /* + * For /dev/mem mmap, we use user mappings, but if the region is + * in kern_memmap (and hence may be covered by a kernel mapping), + * we must use the same attribute as the kernel mapping. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + else if (attr & EFI_MEMORY_UC) + return pgprot_noncached(vma_prot); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported, we prefer that. + */ + if (efi_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return pgprot_cacheable(vma_prot); + + return pgprot_noncached(vma_prot); +} + +int __init +efi_uart_console_only(void) +{ + efi_status_t status; + char *s, name[] = "ConOut"; + efi_guid_t guid = EFI_GLOBAL_VARIABLE_GUID; + efi_char16_t *utf16, name_utf16[32]; + unsigned char data[1024]; + unsigned long size = sizeof(data); + struct efi_generic_dev_path *hdr, *end_addr; + int uart = 0; + + /* Convert to UTF-16 */ + utf16 = name_utf16; + s = name; + while (*s) + *utf16++ = *s++ & 0x7f; + *utf16 = 0; + + status = efi.get_variable(name_utf16, &guid, NULL, &size, data); + if (status != EFI_SUCCESS) { + printk(KERN_ERR "No EFI %s variable?\n", name); + return 0; + } + + hdr = (struct efi_generic_dev_path *) data; + end_addr = (struct efi_generic_dev_path *) ((u8 *) data + size); + while (hdr < end_addr) { + if (hdr->type == EFI_DEV_MSG && + hdr->sub_type == EFI_DEV_MSG_UART) + uart = 1; + else if (hdr->type == EFI_DEV_END_PATH || + hdr->type == EFI_DEV_END_PATH2) { + if (!uart) + return 0; + if (hdr->sub_type == EFI_DEV_END_ENTIRE) + return 1; + uart = 0; + } + hdr = (struct efi_generic_dev_path *)((u8 *) hdr + hdr->length); + } + printk(KERN_ERR "Malformed %s value\n", name); + return 0; +} + +/* + * Look for the first granule aligned memory descriptor memory + * that is big enough to hold EFI memory map. Make sure this + * descriptor is at least granule sized so it does not get trimmed + */ +struct kern_memdesc * +find_memmap_space (void) +{ + u64 contig_low=0, contig_high=0; + u64 as = 0, ae; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 space_needed, efi_desc_size; + unsigned long total_mem = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + /* + * Worst case: we need 3 kernel descriptors for each efi descriptor + * (if every entry has a WB part in the middle, and UC head and tail), + * plus one for the end marker. + */ + space_needed = sizeof(kern_memdesc_t) * + (3 * (ia64_boot_param->efi_memmap_size/efi_desc_size) + 1); + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + continue; + } + if (pmd == NULL || !efi_wb(pmd) || + efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; + q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_memory_available(md) || md->type == EFI_LOADER_DATA) + continue; + + /* Round ends inward to granule boundaries */ + as = max(contig_low, md->phys_addr); + ae = min(contig_high, efi_md_end(md)); + + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + + if (ae - as > space_needed) + break; + } + if (p >= efi_map_end) + panic("Can't allocate space for kernel memory descriptors"); + + return __va(as); +} + +/* + * Walk the EFI memory map and gather all memory available for kernel + * to use. We can allocate partial granules only if the unavailable + * parts exist, and are WB. + */ +unsigned long +efi_memmap_init(u64 *s, u64 *e) +{ + struct kern_memdesc *k, *prev = NULL; + u64 contig_low=0, contig_high=0; + u64 as, ae, lim; + void *efi_map_start, *efi_map_end, *p, *q; + efi_memory_desc_t *md, *pmd = NULL, *check_md; + u64 efi_desc_size; + unsigned long total_mem = 0; + + k = kern_memmap = find_memmap_space(); + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; pmd = md, p += efi_desc_size) { + md = p; + if (!efi_wb(md)) { + if (efi_uc(md) && + (md->type == EFI_CONVENTIONAL_MEMORY || + md->type == EFI_BOOT_SERVICES_DATA)) { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = md->num_pages; + k++; + } + continue; + } + if (pmd == NULL || !efi_wb(pmd) || + efi_md_end(pmd) != md->phys_addr) { + contig_low = GRANULEROUNDUP(md->phys_addr); + contig_high = efi_md_end(md); + for (q = p + efi_desc_size; q < efi_map_end; + q += efi_desc_size) { + check_md = q; + if (!efi_wb(check_md)) + break; + if (contig_high != check_md->phys_addr) + break; + contig_high = efi_md_end(check_md); + } + contig_high = GRANULEROUNDDOWN(contig_high); + } + if (!is_memory_available(md)) + continue; + + /* + * Round ends inward to granule boundaries + * Give trimmings to uncached allocator + */ + if (md->phys_addr < contig_low) { + lim = min(efi_md_end(md), contig_low); + if (efi_uc(md)) { + if (k > kern_memmap && + (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += + (lim - md->phys_addr) + >> EFI_PAGE_SHIFT; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = md->phys_addr; + k->num_pages = (lim - md->phys_addr) + >> EFI_PAGE_SHIFT; + k++; + } + } + as = contig_low; + } else + as = md->phys_addr; + + if (efi_md_end(md) > contig_high) { + lim = max(md->phys_addr, contig_high); + if (efi_uc(md)) { + if (lim == md->phys_addr && k > kern_memmap && + (k-1)->attribute == EFI_MEMORY_UC && + kmd_end(k-1) == md->phys_addr) { + (k-1)->num_pages += md->num_pages; + } else { + k->attribute = EFI_MEMORY_UC; + k->start = lim; + k->num_pages = (efi_md_end(md) - lim) + >> EFI_PAGE_SHIFT; + k++; + } + } + ae = contig_high; + } else + ae = efi_md_end(md); + + /* keep within max_addr= and min_addr= command line arg */ + as = max(as, min_addr); + ae = min(ae, max_addr); + if (ae <= as) + continue; + + /* avoid going over mem= command line arg */ + if (total_mem + (ae - as) > mem_limit) + ae -= total_mem + (ae - as) - mem_limit; + + if (ae <= as) + continue; + if (prev && kmd_end(prev) == md->phys_addr) { + prev->num_pages += (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + continue; + } + k->attribute = EFI_MEMORY_WB; + k->start = as; + k->num_pages = (ae - as) >> EFI_PAGE_SHIFT; + total_mem += ae - as; + prev = k++; + } + k->start = ~0L; /* end-marker */ + + /* reserve the memory we are using for kern_memmap */ + *s = (u64)kern_memmap; + *e = (u64)++k; + + return total_mem; +} + +void +efi_initialize_iomem_resources(struct resource *code_resource, + struct resource *data_resource, + struct resource *bss_resource) +{ + struct resource *res; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + char *name; + unsigned long flags, desc; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + res = NULL; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + + if (md->num_pages == 0) /* should not happen */ + continue; + + flags = IORESOURCE_MEM | IORESOURCE_BUSY; + desc = IORES_DESC_NONE; + + switch (md->type) { + + case EFI_MEMORY_MAPPED_IO: + case EFI_MEMORY_MAPPED_IO_PORT_SPACE: + continue; + + case EFI_LOADER_CODE: + case EFI_LOADER_DATA: + case EFI_BOOT_SERVICES_DATA: + case EFI_BOOT_SERVICES_CODE: + case EFI_CONVENTIONAL_MEMORY: + if (md->attribute & EFI_MEMORY_WP) { + name = "System ROM"; + flags |= IORESOURCE_READONLY; + } else if (md->attribute == EFI_MEMORY_UC) { + name = "Uncached RAM"; + } else { + name = "System RAM"; + flags |= IORESOURCE_SYSRAM; + } + break; + + case EFI_ACPI_MEMORY_NVS: + name = "ACPI Non-volatile Storage"; + desc = IORES_DESC_ACPI_NV_STORAGE; + break; + + case EFI_UNUSABLE_MEMORY: + name = "reserved"; + flags |= IORESOURCE_DISABLED; + break; + + case EFI_PERSISTENT_MEMORY: + name = "Persistent Memory"; + desc = IORES_DESC_PERSISTENT_MEMORY; + break; + + case EFI_RESERVED_TYPE: + case EFI_RUNTIME_SERVICES_CODE: + case EFI_RUNTIME_SERVICES_DATA: + case EFI_ACPI_RECLAIM_MEMORY: + default: + name = "reserved"; + break; + } + + if ((res = kzalloc(sizeof(struct resource), + GFP_KERNEL)) == NULL) { + printk(KERN_ERR + "failed to allocate resource for iomem\n"); + return; + } + + res->name = name; + res->start = md->phys_addr; + res->end = md->phys_addr + efi_md_size(md) - 1; + res->flags = flags; + res->desc = desc; + + if (insert_resource(&iomem_resource, res) < 0) + kfree(res); + else { + /* + * We don't know which region contains + * kernel data so we try it repeatedly and + * let the resource manager test it. + */ + insert_resource(res, code_resource); + insert_resource(res, data_resource); + insert_resource(res, bss_resource); +#ifdef CONFIG_KEXEC + insert_resource(res, &efi_memmap_res); + insert_resource(res, &boot_param_res); + if (crashk_res.end > crashk_res.start) + insert_resource(res, &crashk_res); +#endif + } + } +} + +#ifdef CONFIG_KEXEC +/* find a block of memory aligned to 64M exclude reserved regions + rsvd_regions are sorted + */ +unsigned long __init +kdump_find_rsvd_region (unsigned long size, struct rsvd_region *r, int n) +{ + int i; + u64 start, end; + u64 alignment = 1UL << _PAGE_SIZE_64M; + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (!efi_wb(md)) + continue; + start = ALIGN(md->phys_addr, alignment); + end = efi_md_end(md); + for (i = 0; i < n; i++) { + if (__pa(r[i].start) >= start && __pa(r[i].end) < end) { + if (__pa(r[i].start) > start + size) + return start; + start = ALIGN(__pa(r[i].end), alignment); + if (i < n-1 && + __pa(r[i+1].start) < start + size) + continue; + else + break; + } + } + if (end > start + size) + return start; + } + + printk(KERN_WARNING + "Cannot reserve 0x%lx byte of memory for crashdump\n", size); + return ~0UL; +} +#endif + +#ifdef CONFIG_CRASH_DUMP +/* locate the size find a the descriptor at a certain address */ +unsigned long __init +vmcore_find_descriptor_size (unsigned long address) +{ + void *efi_map_start, *efi_map_end, *p; + efi_memory_desc_t *md; + u64 efi_desc_size; + unsigned long ret = 0; + + efi_map_start = __va(ia64_boot_param->efi_memmap); + efi_map_end = efi_map_start + ia64_boot_param->efi_memmap_size; + efi_desc_size = ia64_boot_param->efi_memdesc_size; + + for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) { + md = p; + if (efi_wb(md) && md->type == EFI_LOADER_DATA + && md->phys_addr == address) { + ret = efi_md_size(md); + break; + } + } + + if (ret == 0) + printk(KERN_WARNING "Cannot locate EFI vmcore descriptor\n"); + + return ret; +} +#endif + +char *efi_systab_show_arch(char *str) +{ + if (mps_phys != EFI_INVALID_TABLE_ADDR) + str += sprintf(str, "MPS=0x%lx\n", mps_phys); + if (hcdp_phys != EFI_INVALID_TABLE_ADDR) + str += sprintf(str, "HCDP=0x%lx\n", hcdp_phys); + return str; +} diff --git a/arch/ia64/kernel/efi_stub.S b/arch/ia64/kernel/efi_stub.S new file mode 100644 index 000000000000..1fd61b78fb29 --- /dev/null +++ b/arch/ia64/kernel/efi_stub.S @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * EFI call stub. + * + * Copyright (C) 1999-2001 Hewlett-Packard Co + * David Mosberger + * + * This stub allows us to make EFI calls in physical mode with interrupts + * turned off. We need this because we can't call SetVirtualMap() until + * the kernel has booted far enough to allow allocation of struct vm_area_struct + * entries (which we would need to map stuff with memory attributes other + * than uncached or writeback...). Since the GetTime() service gets called + * earlier than that, we need to be able to make physical mode EFI calls from + * the kernel. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include +#include + +/* + * Inputs: + * in0 = address of function descriptor of EFI routine to call + * in1..in7 = arguments to routine + * + * Outputs: + * r8 = EFI_STATUS returned by called function + */ + +GLOBAL_ENTRY(efi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,7,7,0 + ld8 r2=[in0],8 // load EFI function's entry point + mov loc0=rp + .body + ;; + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load EFI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov out4=in5 + mov out0=in1 + mov out1=in2 + mov out2=in3 + mov out3=in4 + mov out5=in6 + mov out6=in7 + mov loc5=r19 + mov loc6=r20 + br.call.sptk.many rp=b6 // call the EFI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(efi_call_phys) diff --git a/arch/ia64/kernel/elfcore.c b/arch/ia64/kernel/elfcore.c new file mode 100644 index 000000000000..8895df121540 --- /dev/null +++ b/arch/ia64/kernel/elfcore.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +#include + + +Elf64_Half elf_core_extra_phdrs(struct coredump_params *cprm) +{ + return GATE_EHDR->e_phnum; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + Elf64_Off ofs = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + struct elf_phdr phdr = gate_phdrs[i]; + + if (phdr.p_type == PT_LOAD) { + phdr.p_memsz = PAGE_ALIGN(phdr.p_memsz); + phdr.p_filesz = phdr.p_memsz; + if (ofs == 0) { + ofs = phdr.p_offset = offset; + offset += phdr.p_filesz; + } else { + phdr.p_offset = ofs; + } + } else { + phdr.p_offset += ofs; + } + phdr.p_paddr = 0; /* match other core phdrs */ + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + return 1; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + void *addr = (void *)gate_phdrs[i].p_vaddr; + size_t memsz = PAGE_ALIGN(gate_phdrs[i].p_memsz); + + if (!dump_emit(cprm, addr, memsz)) + return 0; + break; + } + } + return 1; +} + +size_t elf_core_extra_data_size(struct coredump_params *cprm) +{ + const struct elf_phdr *const gate_phdrs = + (const struct elf_phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + size_t size = 0; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i) { + if (gate_phdrs[i].p_type == PT_LOAD) { + size += PAGE_ALIGN(gate_phdrs[i].p_memsz); + break; + } + } + return size; +} diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S new file mode 100644 index 000000000000..ac06d44b9b27 --- /dev/null +++ b/arch/ia64/kernel/entry.S @@ -0,0 +1,1427 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/ia64/kernel/entry.S + * + * Kernel entry points. + * + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999, 2002-2003 + * Asit Mallick + * Don Dugger + * Suresh Siddha + * Fenghua Yu + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + */ +/* + * ia64_switch_to now places correct virtual mapping in in TR2 for + * kernel stack. This allows us to handle interrupts without changing + * to physical mode. + * + * Jonathan Nicklin + * Patrick O'Rourke + * 11/07/2000 + */ +/* + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + */ +/* + * Global (preserved) predicate usage on syscall entry/exit path: + * + * pKStk: See entry.h. + * pUStk: See entry.h. + * pSys: See entry.h. + * pNonSys: !pSys + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "minstate.h" + + /* + * execve() is special because in case of success, we need to + * setup a null register window frame. + */ +ENTRY(ia64_execve) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 + mov loc0=rp + .body + mov out0=in0 // filename + ;; // stop bit between alloc and call + mov out1=in1 // argv + mov out2=in2 // envp + br.call.sptk.many rp=sys_execve +.ret0: + cmp4.ge p6,p7=r8,r0 + mov ar.pfs=loc1 // restore ar.pfs + sxt4 r8=r8 // return 64-bit result + ;; + stf.spill [sp]=f0 + mov rp=loc0 +(p6) mov ar.pfs=r0 // clear ar.pfs on success +(p7) br.ret.sptk.many rp + + /* + * In theory, we'd have to zap this state only to prevent leaking of + * security sensitive state (e.g., if current->mm->dumpable is zero). However, + * this executes in less than 20 cycles even on Itanium, so it's not worth + * optimizing for...). + */ + mov ar.unat=0; mov ar.lc=0 + mov r4=0; mov f2=f0; mov b1=r0 + mov r5=0; mov f3=f0; mov b2=r0 + mov r6=0; mov f4=f0; mov b3=r0 + mov r7=0; mov f5=f0; mov b4=r0 + ldf.fill f12=[sp]; mov f13=f0; mov b5=r0 + ldf.fill f14=[sp]; ldf.fill f15=[sp]; mov f16=f0 + ldf.fill f17=[sp]; ldf.fill f18=[sp]; mov f19=f0 + ldf.fill f20=[sp]; ldf.fill f21=[sp]; mov f22=f0 + ldf.fill f23=[sp]; ldf.fill f24=[sp]; mov f25=f0 + ldf.fill f26=[sp]; ldf.fill f27=[sp]; mov f28=f0 + ldf.fill f29=[sp]; ldf.fill f30=[sp]; mov f31=f0 + br.ret.sptk.many rp +END(ia64_execve) + +/* + * sys_clone2(u64 flags, u64 ustack_base, u64 ustack_size, u64 parent_tidptr, u64 child_tidptr, + * u64 tls) + */ +GLOBAL_ENTRY(sys_clone2) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc r16=ar.pfs,8,2,6,0 + DO_SAVE_SWITCH_STACK + mov loc0=rp + mov loc1=r16 // save ar.pfs across ia64_clone + .body + mov out0=in0 + mov out1=in1 + mov out2=in2 + mov out3=in3 + mov out4=in4 + mov out5=in5 + br.call.sptk.many rp=ia64_clone +.ret1: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys_clone2) + +/* + * sys_clone(u64 flags, u64 ustack_base, u64 parent_tidptr, u64 child_tidptr, u64 tls) + * Deprecated. Use sys_clone2() instead. + */ +GLOBAL_ENTRY(sys_clone) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc r16=ar.pfs,8,2,6,0 + DO_SAVE_SWITCH_STACK + mov loc0=rp + mov loc1=r16 // save ar.pfs across ia64_clone + .body + mov out0=in0 + mov out1=in1 + mov out2=16 // stacksize (compensates for 16-byte scratch area) + mov out3=in3 + mov out4=in4 + mov out5=in5 + br.call.sptk.many rp=ia64_clone +.ret2: .restore sp + adds sp=IA64_SWITCH_STACK_SIZE,sp // pop the switch stack + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(sys_clone) + +/* + * prev_task <- ia64_switch_to(struct task_struct *next) + * With Ingo's new scheduler, interrupts are disabled when this routine gets + * called. The code starting at .map relies on this. The rest of the code + * doesn't care about the interrupt masking status. + */ +GLOBAL_ENTRY(ia64_switch_to) + .prologue + alloc r16=ar.pfs,1,0,0,0 + DO_SAVE_SWITCH_STACK + .body + + adds r22=IA64_TASK_THREAD_KSP_OFFSET,r13 + movl r25=init_task + mov r27=IA64_KR(CURRENT_STACK) + adds r21=IA64_TASK_THREAD_KSP_OFFSET,in0 + dep r20=0,in0,61,3 // physical address of "next" + ;; + st8 [r22]=sp // save kernel stack pointer of old task + shr.u r26=r20,IA64_GRANULE_SHIFT + cmp.eq p7,p6=r25,in0 + ;; + /* + * If we've already mapped this task's page, we can skip doing it again. + */ +(p6) cmp.eq p7,p6=r26,r27 +(p6) br.cond.dpnt .map + ;; +.done: + ld8 sp=[r21] // load kernel stack pointer of new task + MOV_TO_KR(CURRENT, in0, r8, r9) // update "current" application register + mov r8=r13 // return pointer to previously running task + mov r13=in0 // set "current" pointer + ;; + DO_LOAD_SWITCH_STACK + +#ifdef CONFIG_SMP + sync.i // ensure "fc"s done by this CPU are visible on other CPUs +#endif + br.ret.sptk.many rp // boogie on out in new context + +.map: + RSM_PSR_IC(r25) // interrupts (psr.i) are already disabled here + movl r25=PAGE_KERNEL + ;; + srlz.d + or r23=r25,r20 // construct PA | page properties + mov r25=IA64_GRANULE_SHIFT<<2 + ;; + MOV_TO_ITIR(p0, r25, r8) + MOV_TO_IFA(in0, r8) // VA of next task... + ;; + mov r25=IA64_TR_CURRENT_STACK + MOV_TO_KR(CURRENT_STACK, r26, r8, r9) // remember last page we mapped... + ;; + itr.d dtr[r25]=r23 // wire in new mapping... + SSM_PSR_IC_AND_SRLZ_D(r8, r9) // reenable the psr.ic bit + br.cond.sptk .done +END(ia64_switch_to) + +/* + * Note that interrupts are enabled during save_switch_stack and load_switch_stack. This + * means that we may get an interrupt with "sp" pointing to the new kernel stack while + * ar.bspstore is still pointing to the old kernel backing store area. Since ar.rsc, + * ar.rnat, ar.bsp, and ar.bspstore are all preserved by interrupts, this is not a + * problem. Also, we don't need to specify unwind information for preserved registers + * that are not modified in save_switch_stack as the right unwind information is already + * specified at the call-site of save_switch_stack. + */ + +/* + * save_switch_stack: + * - r16 holds ar.pfs + * - b7 holds address to return to + * - rp (b0) holds return address to save + */ +GLOBAL_ENTRY(save_switch_stack) + .prologue + .altrp b7 + flushrs // flush dirty regs to backing store (must be first in insn group) + .save @priunat,r17 + mov r17=ar.unat // preserve caller's + .body +#ifdef CONFIG_ITANIUM + adds r2=16+128,sp + adds r3=16+64,sp + adds r14=SW(R4)+16,sp + ;; + st8.spill [r14]=r4,16 // spill r4 + lfetch.fault.excl.nt1 [r3],128 + ;; + lfetch.fault.excl.nt1 [r2],128 + lfetch.fault.excl.nt1 [r3],128 + ;; + lfetch.fault.excl [r2] + lfetch.fault.excl [r3] + adds r15=SW(R5)+16,sp +#else + add r2=16+3*128,sp + add r3=16,sp + add r14=SW(R4)+16,sp + ;; + st8.spill [r14]=r4,SW(R6)-SW(R4) // spill r4 and prefetch offset 0x1c0 + lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x010 + ;; + lfetch.fault.excl.nt1 [r3],128 // prefetch offset 0x090 + lfetch.fault.excl.nt1 [r2],128 // prefetch offset 0x190 + ;; + lfetch.fault.excl.nt1 [r3] // prefetch offset 0x110 + lfetch.fault.excl.nt1 [r2] // prefetch offset 0x210 + adds r15=SW(R5)+16,sp +#endif + ;; + st8.spill [r15]=r5,SW(R7)-SW(R5) // spill r5 + mov.m ar.rsc=0 // put RSE in mode: enforced lazy, little endian, pl 0 + add r2=SW(F2)+16,sp // r2 = &sw->f2 + ;; + st8.spill [r14]=r6,SW(B0)-SW(R6) // spill r6 + mov.m r18=ar.fpsr // preserve fpsr + add r3=SW(F3)+16,sp // r3 = &sw->f3 + ;; + stf.spill [r2]=f2,32 + mov.m r19=ar.rnat + mov r21=b0 + + stf.spill [r3]=f3,32 + st8.spill [r15]=r7,SW(B2)-SW(R7) // spill r7 + mov r22=b1 + ;; + // since we're done with the spills, read and save ar.unat: + mov.m r29=ar.unat + mov.m r20=ar.bspstore + mov r23=b2 + stf.spill [r2]=f4,32 + stf.spill [r3]=f5,32 + mov r24=b3 + ;; + st8 [r14]=r21,SW(B1)-SW(B0) // save b0 + st8 [r15]=r23,SW(B3)-SW(B2) // save b2 + mov r25=b4 + mov r26=b5 + ;; + st8 [r14]=r22,SW(B4)-SW(B1) // save b1 + st8 [r15]=r24,SW(AR_PFS)-SW(B3) // save b3 + mov r21=ar.lc // I-unit + stf.spill [r2]=f12,32 + stf.spill [r3]=f13,32 + ;; + st8 [r14]=r25,SW(B5)-SW(B4) // save b4 + st8 [r15]=r16,SW(AR_LC)-SW(AR_PFS) // save ar.pfs + stf.spill [r2]=f14,32 + stf.spill [r3]=f15,32 + ;; + st8 [r14]=r26 // save b5 + st8 [r15]=r21 // save ar.lc + stf.spill [r2]=f16,32 + stf.spill [r3]=f17,32 + ;; + stf.spill [r2]=f18,32 + stf.spill [r3]=f19,32 + ;; + stf.spill [r2]=f20,32 + stf.spill [r3]=f21,32 + ;; + stf.spill [r2]=f22,32 + stf.spill [r3]=f23,32 + ;; + stf.spill [r2]=f24,32 + stf.spill [r3]=f25,32 + ;; + stf.spill [r2]=f26,32 + stf.spill [r3]=f27,32 + ;; + stf.spill [r2]=f28,32 + stf.spill [r3]=f29,32 + ;; + stf.spill [r2]=f30,SW(AR_UNAT)-SW(F30) + stf.spill [r3]=f31,SW(PR)-SW(F31) + add r14=SW(CALLER_UNAT)+16,sp + ;; + st8 [r2]=r29,SW(AR_RNAT)-SW(AR_UNAT) // save ar.unat + st8 [r14]=r17,SW(AR_FPSR)-SW(CALLER_UNAT) // save caller_unat + mov r21=pr + ;; + st8 [r2]=r19,SW(AR_BSPSTORE)-SW(AR_RNAT) // save ar.rnat + st8 [r3]=r21 // save predicate registers + ;; + st8 [r2]=r20 // save ar.bspstore + st8 [r14]=r18 // save fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.many b7 +END(save_switch_stack) + +/* + * load_switch_stack: + * - "invala" MUST be done at call site (normally in DO_LOAD_SWITCH_STACK) + * - b7 holds address to return to + * - must not touch r8-r11 + */ +GLOBAL_ENTRY(load_switch_stack) + .prologue + .altrp b7 + + .body + lfetch.fault.nt1 [sp] + adds r2=SW(AR_BSPSTORE)+16,sp + adds r3=SW(AR_UNAT)+16,sp + mov ar.rsc=0 // put RSE into enforced lazy mode + adds r14=SW(CALLER_UNAT)+16,sp + adds r15=SW(AR_FPSR)+16,sp + ;; + ld8 r27=[r2],(SW(B0)-SW(AR_BSPSTORE)) // bspstore + ld8 r29=[r3],(SW(B1)-SW(AR_UNAT)) // unat + ;; + ld8 r21=[r2],16 // restore b0 + ld8 r22=[r3],16 // restore b1 + ;; + ld8 r23=[r2],16 // restore b2 + ld8 r24=[r3],16 // restore b3 + ;; + ld8 r25=[r2],16 // restore b4 + ld8 r26=[r3],16 // restore b5 + ;; + ld8 r16=[r2],(SW(PR)-SW(AR_PFS)) // ar.pfs + ld8 r17=[r3],(SW(AR_RNAT)-SW(AR_LC)) // ar.lc + ;; + ld8 r28=[r2] // restore pr + ld8 r30=[r3] // restore rnat + ;; + ld8 r18=[r14],16 // restore caller's unat + ld8 r19=[r15],24 // restore fpsr + ;; + ldf.fill f2=[r14],32 + ldf.fill f3=[r15],32 + ;; + ldf.fill f4=[r14],32 + ldf.fill f5=[r15],32 + ;; + ldf.fill f12=[r14],32 + ldf.fill f13=[r15],32 + ;; + ldf.fill f14=[r14],32 + ldf.fill f15=[r15],32 + ;; + ldf.fill f16=[r14],32 + ldf.fill f17=[r15],32 + ;; + ldf.fill f18=[r14],32 + ldf.fill f19=[r15],32 + mov b0=r21 + ;; + ldf.fill f20=[r14],32 + ldf.fill f21=[r15],32 + mov b1=r22 + ;; + ldf.fill f22=[r14],32 + ldf.fill f23=[r15],32 + mov b2=r23 + ;; + mov ar.bspstore=r27 + mov ar.unat=r29 // establish unat holding the NaT bits for r4-r7 + mov b3=r24 + ;; + ldf.fill f24=[r14],32 + ldf.fill f25=[r15],32 + mov b4=r25 + ;; + ldf.fill f26=[r14],32 + ldf.fill f27=[r15],32 + mov b5=r26 + ;; + ldf.fill f28=[r14],32 + ldf.fill f29=[r15],32 + mov ar.pfs=r16 + ;; + ldf.fill f30=[r14],32 + ldf.fill f31=[r15],24 + mov ar.lc=r17 + ;; + ld8.fill r4=[r14],16 + ld8.fill r5=[r15],16 + mov pr=r28,-1 + ;; + ld8.fill r6=[r14],16 + ld8.fill r7=[r15],16 + + mov ar.unat=r18 // restore caller's unat + mov ar.rnat=r30 // must restore after bspstore but before rsc! + mov ar.fpsr=r19 // restore fpsr + mov ar.rsc=3 // put RSE back into eager mode, pl 0 + br.cond.sptk.many b7 +END(load_switch_stack) + + /* + * Invoke a system call, but do some tracing before and after the call. + * We MUST preserve the current register frame throughout this routine + * because some system calls (such as ia64_execve) directly + * manipulate ar.pfs. + */ +GLOBAL_ENTRY(ia64_trace_syscall) + PT_REGS_UNWIND_INFO(0) + /* + * We need to preserve the scratch registers f6-f11 in case the system + * call is sigreturn. + */ + adds r16=PT(F6)+16,sp + adds r17=PT(F7)+16,sp + ;; + stf.spill [r16]=f6,32 + stf.spill [r17]=f7,32 + ;; + stf.spill [r16]=f8,32 + stf.spill [r17]=f9,32 + ;; + stf.spill [r16]=f10 + stf.spill [r17]=f11 + br.call.sptk.many rp=syscall_trace_enter // give parent a chance to catch syscall args + cmp.lt p6,p0=r8,r0 // check tracehook + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> + adds r16=PT(F6)+16,sp + adds r17=PT(F7)+16,sp + ;; + ldf.fill f6=[r16],32 + ldf.fill f7=[r17],32 + ;; + ldf.fill f8=[r16],32 + ldf.fill f9=[r17],32 + ;; + ldf.fill f10=[r16] + ldf.fill f11=[r17] + // the syscall number may have changed, so re-load it and re-calculate the + // syscall entry-point: + adds r15=PT(R15)+16,sp // r15 = &pt_regs.r15 (syscall #) + ;; + ld8 r15=[r15] + mov r3=NR_syscalls - 1 + ;; + adds r15=-1024,r15 + movl r16=sys_call_table + ;; + shladd r20=r15,3,r16 // r20 = sys_call_table + 8*(syscall-1024) + cmp.leu p6,p7=r15,r3 + ;; +(p6) ld8 r20=[r20] // load address of syscall entry point +(p7) movl r20=sys_ni_syscall + ;; + mov b6=r20 + br.call.sptk.many rp=b6 // do the syscall +.strace_check_retval: + cmp.lt p6,p0=r8,r0 // syscall failed? + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + adds r3=PT(R10)+16,sp // r3 = &pt_regs.r10 + mov r10=0 +(p6) br.cond.sptk strace_error // syscall failed -> + ;; // avoid RAW on r10 +.strace_save_retval: +.mem.offset 0,0; st8.spill [r2]=r8 // store return value in slot for r8 +.mem.offset 8,0; st8.spill [r3]=r10 // clear error indication in slot for r10 + br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value +.ret3: +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +(pUStk) rsm psr.i // disable interrupts + br.cond.sptk ia64_work_pending_syscall_end + +strace_error: + ld8 r3=[r2] // load pt_regs.r8 + sub r9=0,r8 // negate return value to get errno value + ;; + cmp.ne p6,p0=r3,r0 // is pt_regs.r8!=0? + adds r3=16,r2 // r3=&pt_regs.r10 + ;; +(p6) mov r10=-1 +(p6) mov r8=r9 + br.cond.sptk .strace_save_retval +END(ia64_trace_syscall) + + /* + * When traced and returning from sigreturn, we invoke syscall_trace but then + * go straight to ia64_leave_kernel rather than ia64_leave_syscall. + */ +GLOBAL_ENTRY(ia64_strace_leave_kernel) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + br.call.sptk.many rp=syscall_trace_leave // give parent a chance to catch return value +} +.ret4: br.cond.sptk ia64_leave_kernel +END(ia64_strace_leave_kernel) + +ENTRY(call_payload) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(0) + /* call the kernel_thread payload; fn is in r4, arg - in r5 */ + alloc loc1=ar.pfs,0,3,1,0 + mov loc0=rp + mov loc2=gp + mov out0=r5 // arg + ld8 r14 = [r4], 8 // fn.address + ;; + mov b6 = r14 + ld8 gp = [r4] // fn.gp + ;; + br.call.sptk.many rp=b6 // fn(arg) +.ret12: mov gp=loc2 + mov rp=loc0 + mov ar.pfs=loc1 + /* ... and if it has returned, we are going to userland */ + cmp.ne pKStk,pUStk=r0,r0 + br.ret.sptk.many rp +END(call_payload) + +GLOBAL_ENTRY(ia64_ret_from_clone) + PT_REGS_UNWIND_INFO(0) +{ /* + * Some versions of gas generate bad unwind info if the first instruction of a + * procedure doesn't go into the first slot of a bundle. This is a workaround. + */ + nop.m 0 + nop.i 0 + /* + * We need to call schedule_tail() to complete the scheduling process. + * Called by ia64_switch_to() after ia64_clone()->copy_thread(). r8 contains the + * address of the previously executing task. + */ + br.call.sptk.many rp=ia64_invoke_schedule_tail +} +.ret8: +(pKStk) br.call.sptk.many rp=call_payload + adds r2=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; + ld4 r2=[r2] + ;; + mov r8=0 + and r2=_TIF_SYSCALL_TRACEAUDIT,r2 + ;; + cmp.ne p6,p0=r2,r0 +(p6) br.cond.spnt .strace_check_retval + ;; // added stop bits to prevent r8 dependency +END(ia64_ret_from_clone) + // fall through +GLOBAL_ENTRY(ia64_ret_from_syscall) + PT_REGS_UNWIND_INFO(0) + cmp.ge p6,p7=r8,r0 // syscall executed successfully? + adds r2=PT(R8)+16,sp // r2 = &pt_regs.r8 + mov r10=r0 // clear error indication in r10 +(p7) br.cond.spnt handle_syscall_error // handle potential syscall failure +END(ia64_ret_from_syscall) + // fall through + +/* + * ia64_leave_syscall(): Same as ia64_leave_kernel, except that it doesn't + * need to switch to bank 0 and doesn't restore the scratch registers. + * To avoid leaking kernel bits, the scratch registers are set to + * the following known-to-be-safe values: + * + * r1: restored (global pointer) + * r2: cleared + * r3: 1 (when returning to user-level) + * r8-r11: restored (syscall return value(s)) + * r12: restored (user-level stack pointer) + * r13: restored (user-level thread pointer) + * r14: set to __kernel_syscall_via_epc + * r15: restored (syscall #) + * r16-r17: cleared + * r18: user-level b6 + * r19: cleared + * r20: user-level ar.fpsr + * r21: user-level b0 + * r22: cleared + * r23: user-level ar.bspstore + * r24: user-level ar.rnat + * r25: user-level ar.unat + * r26: user-level ar.pfs + * r27: user-level ar.rsc + * r28: user-level ip + * r29: user-level psr + * r30: user-level cfm + * r31: user-level pr + * f6-f11: cleared + * pr: restored (user-level pr) + * b0: restored (user-level rp) + * b6: restored + * b7: set to __kernel_syscall_via_epc + * ar.unat: restored (user-level ar.unat) + * ar.pfs: restored (user-level ar.pfs) + * ar.rsc: restored (user-level ar.rsc) + * ar.rnat: restored (user-level ar.rnat) + * ar.bspstore: restored (user-level ar.bspstore) + * ar.fpsr: restored (user-level ar.fpsr) + * ar.ccv: cleared + * ar.csd: cleared + * ar.ssd: cleared + */ +GLOBAL_ENTRY(ia64_leave_syscall) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on. + * + * p6 controls whether current_thread_info()->flags needs to be check for + * extra work. We always check for extra work when returning to user-level. + * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count + * is 0. After extra work processing has been completed, execution + * resumes at ia64_work_processed_syscall with p6 set to 1 if the extra-work-check + * needs to be redone. + */ +#ifdef CONFIG_PREEMPTION + RSM_PSR_I(p0, r2, r18) // disable interrupts + cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; + .pred.rel.mutex pUStk,pKStk +(pKStk) ld4 r21=[r20] // r21 <- preempt_count +(pUStk) mov r21=0 // r21 <- 0 + ;; + cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) +#else /* !CONFIG_PREEMPTION */ + RSM_PSR_I(pUStk, r2, r18) + cmp.eq pLvSys,p0=r0,r0 // pLvSys=1: leave from syscall +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +#endif +.global ia64_work_processed_syscall; +ia64_work_processed_syscall: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + adds r2=PT(LOADRS)+16,r12 + MOV_FROM_ITC(pUStk, p9, r22, r19) // fetch time at leave + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + adds r3=PT(AR_BSPSTORE)+16,r12 // deferred + ;; +#else + adds r2=PT(LOADRS)+16,r12 + adds r3=PT(AR_BSPSTORE)+16,r12 + adds r18=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r18] // load current_thread_info()->flags + ld8 r19=[r2],PT(B6)-PT(LOADRS) // load ar.rsc value for "loadrs" + nop.i 0 + ;; +#endif + mov r16=ar.bsp // M2 get existing backing store pointer + ld8 r18=[r2],PT(R9)-PT(B6) // load b6 +(p6) and r15=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; + ld8 r23=[r3],PT(R11)-PT(AR_BSPSTORE) // load ar.bspstore (may be garbage) +(p6) cmp4.ne.unc p6,p0=r15, r0 // any special work pending? +(p6) br.cond.spnt .work_pending_syscall + ;; + // start restoring the state saved on the kernel stack (struct pt_regs): + ld8 r9=[r2],PT(CR_IPSR)-PT(R9) + ld8 r11=[r3],PT(CR_IIP)-PT(R11) +(pNonSys) break 0 // bug check: we shouldn't be here if pNonSys is TRUE! + ;; + invala // M0|1 invalidate ALAT + RSM_PSR_I_IC(r28, r29, r30) // M2 turn off interrupts and interruption collection + cmp.eq p9,p0=r0,r0 // A set p9 to indicate that we should restore cr.ifs + + ld8 r29=[r2],16 // M0|1 load cr.ipsr + ld8 r28=[r3],16 // M0|1 load cr.iip +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) add r14=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + ld8 r25=[r3],16 // M0|1 load ar.unat +(pUStk) add r15=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; +#else + mov r22=r0 // A clear r22 + ;; + ld8 r30=[r2],16 // M0|1 load cr.ifs + ld8 r25=[r3],16 // M0|1 load ar.unat +(pUStk) add r14=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; +#endif + ld8 r26=[r2],PT(B0)-PT(AR_PFS) // M0|1 load ar.pfs + MOV_FROM_PSR(pKStk, r22, r21) // M2 read PSR now that interrupts are disabled + nop 0 + ;; + ld8 r21=[r2],PT(AR_RNAT)-PT(B0) // M0|1 load b0 + ld8 r27=[r3],PT(PR)-PT(AR_RSC) // M0|1 load ar.rsc + mov f6=f0 // F clear f6 + ;; + ld8 r24=[r2],PT(AR_FPSR)-PT(AR_RNAT) // M0|1 load ar.rnat (may be garbage) + ld8 r31=[r3],PT(R1)-PT(PR) // M0|1 load predicates + mov f7=f0 // F clear f7 + ;; + ld8 r20=[r2],PT(R12)-PT(AR_FPSR) // M0|1 load ar.fpsr + ld8.fill r1=[r3],16 // M0|1 load r1 +(pUStk) mov r17=1 // A + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) st1 [r15]=r17 // M2|3 +#else +(pUStk) st1 [r14]=r17 // M2|3 +#endif + ld8.fill r13=[r3],16 // M0|1 + mov f8=f0 // F clear f8 + ;; + ld8.fill r12=[r2] // M0|1 restore r12 (sp) + ld8.fill r15=[r3] // M0|1 restore r15 + mov b6=r18 // I0 restore b6 + + LOAD_PHYS_STACK_REG_SIZE(r17) + mov f9=f0 // F clear f9 +(pKStk) br.cond.dpnt.many skip_rbs_switch // B + + srlz.d // M0 ensure interruption collection is off (for cover) + shr.u r18=r19,16 // I0|1 get byte size of existing "dirty" partition + COVER // B add current frame into dirty partition & set cr.ifs + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + mov r19=ar.bsp // M2 get new backing store pointer + st8 [r14]=r22 // M save time at leave + mov f10=f0 // F clear f10 + + mov r22=r0 // A clear r22 + movl r14=__kernel_syscall_via_epc // X + ;; +#else + mov r19=ar.bsp // M2 get new backing store pointer + mov f10=f0 // F clear f10 + + nop.m 0 + movl r14=__kernel_syscall_via_epc // X + ;; +#endif + mov.m ar.csd=r0 // M2 clear ar.csd + mov.m ar.ccv=r0 // M2 clear ar.ccv + mov b7=r14 // I0 clear b7 (hint with __kernel_syscall_via_epc) + + mov.m ar.ssd=r0 // M2 clear ar.ssd + mov f11=f0 // F clear f11 + br.cond.sptk.many rbs_switch // B +END(ia64_leave_syscall) + +GLOBAL_ENTRY(ia64_leave_kernel) + PT_REGS_UNWIND_INFO(0) + /* + * work.need_resched etc. mustn't get changed by this CPU before it returns to + * user- or fsys-mode, hence we disable interrupts early on. + * + * p6 controls whether current_thread_info()->flags needs to be check for + * extra work. We always check for extra work when returning to user-level. + * With CONFIG_PREEMPTION, we also check for extra work when the preempt_count + * is 0. After extra work processing has been completed, execution + * resumes at .work_processed_syscall with p6 set to 1 if the extra-work-check + * needs to be redone. + */ +#ifdef CONFIG_PREEMPTION + RSM_PSR_I(p0, r17, r31) // disable interrupts + cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel +(pKStk) adds r20=TI_PRE_COUNT+IA64_TASK_SIZE,r13 + ;; + .pred.rel.mutex pUStk,pKStk +(pKStk) ld4 r21=[r20] // r21 <- preempt_count +(pUStk) mov r21=0 // r21 <- 0 + ;; + cmp.eq p6,p0=r21,r0 // p6 <- pUStk || (preempt_count == 0) +#else + RSM_PSR_I(pUStk, r17, r31) + cmp.eq p0,pLvSys=r0,r0 // pLvSys=0: leave from kernel +(pUStk) cmp.eq.unc p6,p0=r0,r0 // p6 <- pUStk +#endif +.work_processed_kernel: + adds r17=TI_FLAGS+IA64_TASK_SIZE,r13 + ;; +(p6) ld4 r31=[r17] // load current_thread_info()->flags + adds r21=PT(PR)+16,r12 + ;; + + lfetch [r21],PT(CR_IPSR)-PT(PR) + adds r2=PT(B6)+16,r12 + adds r3=PT(R16)+16,r12 + ;; + lfetch [r21] + ld8 r28=[r2],8 // load b6 + adds r29=PT(R24)+16,r12 + + ld8.fill r16=[r3],PT(AR_CSD)-PT(R16) + adds r30=PT(AR_CCV)+16,r12 +(p6) and r19=TIF_WORK_MASK,r31 // any work other than TIF_SYSCALL_TRACE? + ;; + ld8.fill r24=[r29] + ld8 r15=[r30] // load ar.ccv +(p6) cmp4.ne.unc p6,p0=r19, r0 // any special work pending? + ;; + ld8 r29=[r2],16 // load b7 + ld8 r30=[r3],16 // load ar.csd +(p6) br.cond.spnt .work_pending + ;; + ld8 r31=[r2],16 // load ar.ssd + ld8.fill r8=[r3],16 + ;; + ld8.fill r9=[r2],16 + ld8.fill r10=[r3],PT(R17)-PT(R10) + ;; + ld8.fill r11=[r2],PT(R18)-PT(R11) + ld8.fill r17=[r3],16 + ;; + ld8.fill r18=[r2],16 + ld8.fill r19=[r3],16 + ;; + ld8.fill r20=[r2],16 + ld8.fill r21=[r3],16 + mov ar.csd=r30 + mov ar.ssd=r31 + ;; + RSM_PSR_I_IC(r23, r22, r25) // initiate turning off of interrupt and interruption collection + invala // invalidate ALAT + ;; + ld8.fill r22=[r2],24 + ld8.fill r23=[r3],24 + mov b6=r28 + ;; + ld8.fill r25=[r2],16 + ld8.fill r26=[r3],16 + mov b7=r29 + ;; + ld8.fill r27=[r2],16 + ld8.fill r28=[r3],16 + ;; + ld8.fill r29=[r2],16 + ld8.fill r30=[r3],24 + ;; + ld8.fill r31=[r2],PT(F9)-PT(R31) + adds r3=PT(F10)-PT(F6),r3 + ;; + ldf.fill f9=[r2],PT(F6)-PT(F9) + ldf.fill f10=[r3],PT(F8)-PT(F10) + ;; + ldf.fill f6=[r2],PT(F7)-PT(F6) + ;; + ldf.fill f7=[r2],PT(F11)-PT(F7) + ldf.fill f8=[r3],32 + ;; + srlz.d // ensure that inter. collection is off (VHPT is don't care, since text is pinned) + mov ar.ccv=r15 + ;; + ldf.fill f11=[r2] + BSW_0(r2, r3, r15) // switch back to bank 0 (no stop bit required beforehand...) + ;; +(pUStk) mov r18=IA64_KR(CURRENT)// M2 (12 cycle read latency) + adds r16=PT(CR_IPSR)+16,r12 + adds r17=PT(CR_IIP)+16,r12 + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + .pred.rel.mutex pUStk,pKStk + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled + MOV_FROM_ITC(pUStk, p9, r22, r29) // M fetch time at leave + nop.i 0 + ;; +#else + MOV_FROM_PSR(pKStk, r22, r29) // M2 read PSR now that interrupts are disabled + nop.i 0 + nop.i 0 + ;; +#endif + ld8 r29=[r16],16 // load cr.ipsr + ld8 r28=[r17],16 // load cr.iip + ;; + ld8 r30=[r16],16 // load cr.ifs + ld8 r25=[r17],16 // load ar.unat + ;; + ld8 r26=[r16],16 // load ar.pfs + ld8 r27=[r17],16 // load ar.rsc + cmp.eq p9,p0=r0,r0 // set p9 to indicate that we should restore cr.ifs + ;; + ld8 r24=[r16],16 // load ar.rnat (may be garbage) + ld8 r23=[r17],16 // load ar.bspstore (may be garbage) + ;; + ld8 r31=[r16],16 // load predicates + ld8 r21=[r17],16 // load b0 + ;; + ld8 r19=[r16],16 // load ar.rsc value for "loadrs" + ld8.fill r1=[r17],16 // load r1 + ;; + ld8.fill r12=[r16],16 + ld8.fill r13=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) adds r3=TI_AC_LEAVE+IA64_TASK_SIZE,r18 +#else +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 +#endif + ;; + ld8 r20=[r16],16 // ar.fpsr + ld8.fill r15=[r17],16 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +(pUStk) adds r18=IA64_TASK_THREAD_ON_USTACK_OFFSET,r18 // deferred +#endif + ;; + ld8.fill r14=[r16],16 + ld8.fill r2=[r17] +(pUStk) mov r17=1 + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + // mmi_ : ld8 st1 shr;; mmi_ : st8 st1 shr;; + // mib : mov add br -> mib : ld8 add br + // bbb_ : br nop cover;; mbb_ : mov br cover;; + // + // no one require bsp in r16 if (pKStk) branch is selected. +(pUStk) st8 [r3]=r22 // save time at leave +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + ld8.fill r3=[r16] // deferred + LOAD_PHYS_STACK_REG_SIZE(r17) +(pKStk) br.cond.dpnt skip_rbs_switch + mov r16=ar.bsp // get existing backing store pointer +#else + ld8.fill r3=[r16] +(pUStk) st1 [r18]=r17 // restore current->thread.on_ustack + shr.u r18=r19,16 // get byte size of existing "dirty" partition + ;; + mov r16=ar.bsp // get existing backing store pointer + LOAD_PHYS_STACK_REG_SIZE(r17) +(pKStk) br.cond.dpnt skip_rbs_switch +#endif + + /* + * Restore user backing store. + * + * NOTE: alloc, loadrs, and cover can't be predicated. + */ +(pNonSys) br.cond.dpnt dont_preserve_current_frame + COVER // add current frame into dirty partition and set cr.ifs + ;; + mov r19=ar.bsp // get new backing store pointer +rbs_switch: + sub r16=r16,r18 // krbs = old bsp - size of dirty partition + cmp.ne p9,p0=r0,r0 // clear p9 to skip restore of cr.ifs + ;; + sub r19=r19,r16 // calculate total byte size of dirty partition + add r18=64,r18 // don't force in0-in7 into memory... + ;; + shl r19=r19,16 // shift size of dirty partition into loadrs position + ;; +dont_preserve_current_frame: + /* + * To prevent leaking bits between the kernel and user-space, + * we must clear the stacked registers in the "invalid" partition here. + * Not pretty, but at least it's fast (3.34 registers/cycle on Itanium, + * 5 registers/cycle on McKinley). + */ +# define pRecurse p6 +# define pReturn p7 +#ifdef CONFIG_ITANIUM +# define Nregs 10 +#else +# define Nregs 14 +#endif + alloc loc0=ar.pfs,2,Nregs-2,2,0 + shr.u loc1=r18,9 // RNaTslots <= floor(dirtySize / (64*8)) + sub r17=r17,r18 // r17 = (physStackedSize + 8) - dirtySize + ;; + mov ar.rsc=r19 // load ar.rsc to be used for "loadrs" + shladd in0=loc1,3,r17 + mov in1=0 + ;; + TEXT_ALIGN(32) +rse_clear_invalid: +#ifdef CONFIG_ITANIUM + // cycle 0 + { .mii + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 +}{ .mfb + add out1=1,in1 // increment recursion count + nop.f 0 + nop.b 0 // can't do br.call here because of alloc (WAW on CFM) + ;; +}{ .mfi // cycle 1 + mov loc1=0 + nop.f 0 + mov loc2=0 +}{ .mib + mov loc3=0 + mov loc4=0 +(pRecurse) br.call.sptk.many b0=rse_clear_invalid + +}{ .mfi // cycle 2 + mov loc5=0 + nop.f 0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret +}{ .mib + mov loc6=0 + mov loc7=0 +(pReturn) br.ret.sptk.many b0 +} +#else /* !CONFIG_ITANIUM */ + alloc loc0=ar.pfs,2,Nregs-2,2,0 + cmp.lt pRecurse,p0=Nregs*8,in0 // if more than Nregs regs left to clear, (re)curse + add out0=-Nregs*8,in0 + add out1=1,in1 // increment recursion count + mov loc1=0 + mov loc2=0 + ;; + mov loc3=0 + mov loc4=0 + mov loc5=0 + mov loc6=0 + mov loc7=0 +(pRecurse) br.call.dptk.few b0=rse_clear_invalid + ;; + mov loc8=0 + mov loc9=0 + cmp.ne pReturn,p0=r0,in1 // if recursion count != 0, we need to do a br.ret + mov loc10=0 + mov loc11=0 +(pReturn) br.ret.dptk.many b0 +#endif /* !CONFIG_ITANIUM */ +# undef pRecurse +# undef pReturn + ;; + alloc r17=ar.pfs,0,0,0,0 // drop current register frame + ;; + loadrs + ;; +skip_rbs_switch: + mov ar.unat=r25 // M2 +(pKStk) extr.u r22=r22,21,1 // I0 extract current value of psr.pp from r22 +(pLvSys)mov r19=r0 // A clear r19 for leave_syscall, no-op otherwise + ;; +(pUStk) mov ar.bspstore=r23 // M2 +(pKStk) dep r29=r22,r29,21,1 // I0 update ipsr.pp with psr.pp +(pLvSys)mov r16=r0 // A clear r16 for leave_syscall, no-op otherwise + ;; + MOV_TO_IPSR(p0, r29, r25) // M2 + mov ar.pfs=r26 // I0 +(pLvSys)mov r17=r0 // A clear r17 for leave_syscall, no-op otherwise + + MOV_TO_IFS(p9, r30, r25)// M2 + mov b0=r21 // I0 +(pLvSys)mov r18=r0 // A clear r18 for leave_syscall, no-op otherwise + + mov ar.fpsr=r20 // M2 + MOV_TO_IIP(r28, r25) // M2 + nop 0 + ;; +(pUStk) mov ar.rnat=r24 // M2 must happen with RSE in lazy mode + nop 0 +(pLvSys)mov r2=r0 + + mov ar.rsc=r27 // M2 + mov pr=r31,-1 // I0 + RFI // B + + /* + * On entry: + * r20 = ¤t->thread_info->pre_count (if CONFIG_PREEMPTION) + * r31 = current->thread_info->flags + * On exit: + * p6 = TRUE if work-pending-check needs to be redone + * + * Interrupts are disabled on entry, reenabled depend on work, and + * disabled on exit. + */ +.work_pending_syscall: + add r2=-8,r2 + add r3=-8,r3 + ;; + st8 [r2]=r8 + st8 [r3]=r10 +.work_pending: + tbit.z p6,p0=r31,TIF_NEED_RESCHED // is resched not needed? +(p6) br.cond.sptk.few .notify + br.call.spnt.many rp=preempt_schedule_irq +.ret9: cmp.eq p6,p0=r0,r0 // p6 <- 1 (re-check) +(pLvSys)br.cond.sptk.few ia64_work_pending_syscall_end + br.cond.sptk.many .work_processed_kernel + +.notify: +(pUStk) br.call.spnt.many rp=notify_resume_user +.ret10: cmp.ne p6,p0=r0,r0 // p6 <- 0 (don't re-check) +(pLvSys)br.cond.sptk.few ia64_work_pending_syscall_end + br.cond.sptk.many .work_processed_kernel + +.global ia64_work_pending_syscall_end; +ia64_work_pending_syscall_end: + adds r2=PT(R8)+16,r12 + adds r3=PT(R10)+16,r12 + ;; + ld8 r8=[r2] + ld8 r10=[r3] + br.cond.sptk.many ia64_work_processed_syscall +END(ia64_leave_kernel) + +ENTRY(handle_syscall_error) + /* + * Some system calls (e.g., ptrace, mmap) can return arbitrary values which could + * lead us to mistake a negative return value as a failed syscall. Those syscall + * must deposit a non-zero value in pt_regs.r8 to indicate an error. If + * pt_regs.r8 is zero, we assume that the call completed successfully. + */ + PT_REGS_UNWIND_INFO(0) + ld8 r3=[r2] // load pt_regs.r8 + ;; + cmp.eq p6,p7=r3,r0 // is pt_regs.r8==0? + ;; +(p7) mov r10=-1 +(p7) sub r8=0,r8 // negate return value to get errno + br.cond.sptk ia64_leave_syscall +END(handle_syscall_error) + + /* + * Invoke schedule_tail(task) while preserving in0-in7, which may be needed + * in case a system call gets restarted. + */ +GLOBAL_ENTRY(ia64_invoke_schedule_tail) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,1,0 + mov loc0=rp + mov out0=r8 // Address of previous task + ;; + br.call.sptk.many rp=schedule_tail +.ret11: mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(ia64_invoke_schedule_tail) + + /* + * Setup stack and call do_notify_resume_user(), keeping interrupts + * disabled. + * + * Note that pSys and pNonSys need to be set up by the caller. + * We declare 8 input registers so the system call args get preserved, + * in case we need to restart a system call. + */ +GLOBAL_ENTRY(notify_resume_user) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(8) + alloc loc1=ar.pfs,8,2,3,0 // preserve all eight input regs in case of syscall restart! + mov r9=ar.unat + mov loc0=rp // save return address + mov out0=0 // there is no "oldset" + adds out1=8,sp // out1=&sigscratch->ar_pfs +(pSys) mov out2=1 // out2==1 => we're in a syscall + ;; +(pNonSys) mov out2=0 // out2==0 => not a syscall + .fframe 16 + .spillsp ar.unat, 16 + st8 [sp]=r9,-16 // allocate space for ar.unat and save it + st8 [out1]=loc1,-8 // save ar.pfs, out1=&sigscratch + .body + br.call.sptk.many rp=do_notify_resume_user +.ret15: .restore sp + adds sp=16,sp // pop scratch stack space + ;; + ld8 r9=[sp] // load new unat from sigscratch->scratch_unat + mov rp=loc0 + ;; + mov ar.unat=r9 + mov ar.pfs=loc1 + br.ret.sptk.many rp +END(notify_resume_user) + +ENTRY(sys_rt_sigreturn) + PT_REGS_UNWIND_INFO(0) + /* + * Allocate 8 input registers since ptrace() may clobber them + */ + alloc r2=ar.pfs,8,0,1,0 + .prologue + PT_REGS_SAVES(16) + adds sp=-16,sp + .body + cmp.eq pNonSys,pSys=r0,r0 // sigreturn isn't a normal syscall... + ;; + /* + * leave_kernel() restores f6-f11 from pt_regs, but since the streamlined + * syscall-entry path does not save them we save them here instead. Note: we + * don't need to save any other registers that are not saved by the stream-lined + * syscall path, because restore_sigcontext() restores them. + */ + adds r16=PT(F6)+32,sp + adds r17=PT(F7)+32,sp + ;; + stf.spill [r16]=f6,32 + stf.spill [r17]=f7,32 + ;; + stf.spill [r16]=f8,32 + stf.spill [r17]=f9,32 + ;; + stf.spill [r16]=f10 + stf.spill [r17]=f11 + adds out0=16,sp // out0 = &sigscratch + br.call.sptk.many rp=ia64_rt_sigreturn +.ret19: .restore sp,0 + adds sp=16,sp + ;; + ld8 r9=[sp] // load new ar.unat + mov.sptk b7=r8,ia64_leave_kernel + ;; + mov ar.unat=r9 + br.many b7 +END(sys_rt_sigreturn) + +GLOBAL_ENTRY(ia64_prepare_handle_unaligned) + .prologue + /* + * r16 = fake ar.pfs, we simply need to make sure privilege is still 0 + */ + mov r16=r0 + DO_SAVE_SWITCH_STACK + br.call.sptk.many rp=ia64_handle_unaligned // stack frame setup in ivt +.ret21: .body + DO_LOAD_SWITCH_STACK + br.cond.sptk.many rp // goes to ia64_leave_kernel +END(ia64_prepare_handle_unaligned) + + // + // unw_init_running(void (*callback)(info, arg), void *arg) + // +# define EXTRA_FRAME_SIZE ((UNW_FRAME_INFO_SIZE+15)&~15) + +GLOBAL_ENTRY(unw_init_running) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,3,3,0 + ;; + ld8 loc2=[in0],8 + mov loc0=rp + mov r16=loc1 + DO_SAVE_SWITCH_STACK + .body + + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + .fframe IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE + SWITCH_STACK_SAVES(EXTRA_FRAME_SIZE) + adds sp=-EXTRA_FRAME_SIZE,sp + .body + ;; + adds out0=16,sp // &info + mov out1=r13 // current + adds out2=16+EXTRA_FRAME_SIZE,sp // &switch_stack + br.call.sptk.many rp=unw_init_frame_info +1: adds out0=16,sp // &info + mov b6=loc2 + mov loc2=gp // save gp across indirect function call + ;; + ld8 gp=[in0] + mov out1=in1 // arg + br.call.sptk.many rp=b6 // invoke the callback function +1: mov gp=loc2 // restore gp + + // For now, we don't allow changing registers from within + // unw_init_running; if we ever want to allow that, we'd + // have to do a load_switch_stack here: + .restore sp + adds sp=IA64_SWITCH_STACK_SIZE+EXTRA_FRAME_SIZE,sp + + mov ar.pfs=loc1 + mov rp=loc0 + br.ret.sptk.many rp +END(unw_init_running) +EXPORT_SYMBOL(unw_init_running) + +#ifdef CONFIG_FUNCTION_TRACER +#ifdef CONFIG_DYNAMIC_FTRACE +GLOBAL_ENTRY(_mcount) + br ftrace_stub +END(_mcount) +EXPORT_SYMBOL(_mcount) + +.here: + br.ret.sptk.many b0 + +GLOBAL_ENTRY(ftrace_caller) + alloc out0 = ar.pfs, 8, 0, 4, 0 + mov out3 = r0 + ;; + mov out2 = b0 + add r3 = 0x20, r3 + mov out1 = r1; + br.call.sptk.many b0 = ftrace_patch_gp + //this might be called from module, so we must patch gp +ftrace_patch_gp: + movl gp=__gp + mov b0 = r3 + ;; +.global ftrace_call; +ftrace_call: +{ + .mlx + nop.m 0x0 + movl r3 = .here;; +} + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(ftrace_caller) + +#else +GLOBAL_ENTRY(_mcount) + movl r2 = ftrace_stub + movl r3 = ftrace_trace_function;; + ld8 r3 = [r3];; + ld8 r3 = [r3];; + cmp.eq p7,p0 = r2, r3 +(p7) br.sptk.many ftrace_stub + ;; + + alloc loc0 = ar.pfs, 4, 4, 2, 0 + ;; + mov loc1 = b0 + mov out0 = b0 + mov loc2 = r8 + mov loc3 = r15 + ;; + adds out0 = -MCOUNT_INSN_SIZE, out0 + mov out1 = in2 + mov b6 = r3 + + br.call.sptk.many b0 = b6 + ;; + mov ar.pfs = loc0 + mov b0 = loc1 + mov r8 = loc2 + mov r15 = loc3 + br ftrace_stub + ;; +END(_mcount) +#endif + +GLOBAL_ENTRY(ftrace_stub) + mov r3 = b0 + movl r2 = _mcount_ret_helper + ;; + mov b6 = r2 + mov b7 = r3 + br.ret.sptk.many b6 + +_mcount_ret_helper: + mov b0 = r42 + mov r1 = r41 + mov ar.pfs = r40 + br b7 +END(ftrace_stub) + +#endif /* CONFIG_FUNCTION_TRACER */ + +#define __SYSCALL(nr, entry) data8 entry + .rodata + .align 8 + .globl sys_call_table +sys_call_table: +#include diff --git a/arch/ia64/kernel/entry.h b/arch/ia64/kernel/entry.h new file mode 100644 index 000000000000..6463dc316263 --- /dev/null +++ b/arch/ia64/kernel/entry.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * Preserved registers that are shared between code in ivt.S and + * entry.S. Be careful not to step on these! + */ +#define PRED_LEAVE_SYSCALL 1 /* TRUE iff leave from syscall */ +#define PRED_KERNEL_STACK 2 /* returning to kernel-stacks? */ +#define PRED_USER_STACK 3 /* returning to user-stacks? */ +#define PRED_SYSCALL 4 /* inside a system call? */ +#define PRED_NON_SYSCALL 5 /* complement of PRED_SYSCALL */ + +#ifdef __ASSEMBLY__ +# define PASTE2(x,y) x##y +# define PASTE(x,y) PASTE2(x,y) + +# define pLvSys PASTE(p,PRED_LEAVE_SYSCALL) +# define pKStk PASTE(p,PRED_KERNEL_STACK) +# define pUStk PASTE(p,PRED_USER_STACK) +# define pSys PASTE(p,PRED_SYSCALL) +# define pNonSys PASTE(p,PRED_NON_SYSCALL) +#endif + +#define PT(f) (IA64_PT_REGS_##f##_OFFSET) +#define SW(f) (IA64_SWITCH_STACK_##f##_OFFSET) +#define SOS(f) (IA64_SAL_OS_STATE_##f##_OFFSET) + +#define PT_REGS_SAVES(off) \ + .unwabi 3, 'i'; \ + .fframe IA64_PT_REGS_SIZE+16+(off); \ + .spillsp rp, PT(CR_IIP)+16+(off); \ + .spillsp ar.pfs, PT(CR_IFS)+16+(off); \ + .spillsp ar.unat, PT(AR_UNAT)+16+(off); \ + .spillsp ar.fpsr, PT(AR_FPSR)+16+(off); \ + .spillsp pr, PT(PR)+16+(off); + +#define PT_REGS_UNWIND_INFO(off) \ + .prologue; \ + PT_REGS_SAVES(off); \ + .body + +#define SWITCH_STACK_SAVES(off) \ + .savesp ar.unat,SW(CALLER_UNAT)+16+(off); \ + .savesp ar.fpsr,SW(AR_FPSR)+16+(off); \ + .spillsp f2,SW(F2)+16+(off); .spillsp f3,SW(F3)+16+(off); \ + .spillsp f4,SW(F4)+16+(off); .spillsp f5,SW(F5)+16+(off); \ + .spillsp f16,SW(F16)+16+(off); .spillsp f17,SW(F17)+16+(off); \ + .spillsp f18,SW(F18)+16+(off); .spillsp f19,SW(F19)+16+(off); \ + .spillsp f20,SW(F20)+16+(off); .spillsp f21,SW(F21)+16+(off); \ + .spillsp f22,SW(F22)+16+(off); .spillsp f23,SW(F23)+16+(off); \ + .spillsp f24,SW(F24)+16+(off); .spillsp f25,SW(F25)+16+(off); \ + .spillsp f26,SW(F26)+16+(off); .spillsp f27,SW(F27)+16+(off); \ + .spillsp f28,SW(F28)+16+(off); .spillsp f29,SW(F29)+16+(off); \ + .spillsp f30,SW(F30)+16+(off); .spillsp f31,SW(F31)+16+(off); \ + .spillsp r4,SW(R4)+16+(off); .spillsp r5,SW(R5)+16+(off); \ + .spillsp r6,SW(R6)+16+(off); .spillsp r7,SW(R7)+16+(off); \ + .spillsp b0,SW(B0)+16+(off); .spillsp b1,SW(B1)+16+(off); \ + .spillsp b2,SW(B2)+16+(off); .spillsp b3,SW(B3)+16+(off); \ + .spillsp b4,SW(B4)+16+(off); .spillsp b5,SW(B5)+16+(off); \ + .spillsp ar.pfs,SW(AR_PFS)+16+(off); .spillsp ar.lc,SW(AR_LC)+16+(off); \ + .spillsp @priunat,SW(AR_UNAT)+16+(off); \ + .spillsp ar.rnat,SW(AR_RNAT)+16+(off); \ + .spillsp ar.bspstore,SW(AR_BSPSTORE)+16+(off); \ + .spillsp pr,SW(PR)+16+(off) + +#define DO_SAVE_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + .fframe IA64_SWITCH_STACK_SIZE; \ + adds sp=-IA64_SWITCH_STACK_SIZE,sp; \ + mov.ret.sptk b7=r28,1f; \ + SWITCH_STACK_SAVES(0); \ + br.cond.sptk.many save_switch_stack; \ +1: + +#define DO_LOAD_SWITCH_STACK \ + movl r28=1f; \ + ;; \ + invala; \ + mov.ret.sptk b7=r28,1f; \ + br.cond.sptk.many load_switch_stack; \ +1: .restore sp; \ + adds sp=IA64_SWITCH_STACK_SIZE,sp diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c new file mode 100644 index 000000000000..dd5bfed52031 --- /dev/null +++ b/arch/ia64/kernel/err_inject.c @@ -0,0 +1,273 @@ +/* + * err_inject.c - + * 1.) Inject errors to a processor. + * 2.) Query error injection capabilities. + * This driver along with user space code can be acting as an error + * injection tool. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Written by: Fenghua Yu , Intel Corporation + * Copyright (C) 2006, Intel Corp. All rights reserved. + * + */ +#include +#include +#include +#include +#include + +#define ERR_INJ_DEBUG + +#define ERR_DATA_BUFFER_SIZE 3 // Three 8-byte; + +#define define_one_ro(name) \ +static DEVICE_ATTR(name, 0444, show_##name, NULL) + +#define define_one_rw(name) \ +static DEVICE_ATTR(name, 0644, show_##name, store_##name) + +static u64 call_start[NR_CPUS]; +static u64 phys_addr[NR_CPUS]; +static u64 err_type_info[NR_CPUS]; +static u64 err_struct_info[NR_CPUS]; +static struct { + u64 data1; + u64 data2; + u64 data3; +} __attribute__((__aligned__(16))) err_data_buffer[NR_CPUS]; +static s64 status[NR_CPUS]; +static u64 capabilities[NR_CPUS]; +static u64 resources[NR_CPUS]; + +#define show(name) \ +static ssize_t \ +show_##name(struct device *dev, struct device_attribute *attr, \ + char *buf) \ +{ \ + u32 cpu=dev->id; \ + return sprintf(buf, "%llx\n", name[cpu]); \ +} + +#define store(name) \ +static ssize_t \ +store_##name(struct device *dev, struct device_attribute *attr, \ + const char *buf, size_t size) \ +{ \ + unsigned int cpu=dev->id; \ + name[cpu] = simple_strtoull(buf, NULL, 16); \ + return size; \ +} + +show(call_start) + +/* It's user's responsibility to call the PAL procedure on a specific + * processor. The cpu number in driver is only used for storing data. + */ +static ssize_t +store_call_start(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + unsigned long call_start = simple_strtoull(buf, NULL, 16); + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu); + printk(KERN_DEBUG "err_type_info=%llx,\n", err_type_info[cpu]); + printk(KERN_DEBUG "err_struct_info=%llx,\n", err_struct_info[cpu]); + printk(KERN_DEBUG "err_data_buffer=%llx, %llx, %llx.\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +#endif + switch (call_start) { + case 0: /* Do nothing. */ + break; + case 1: /* Call pal_mc_error_inject in physical mode. */ + status[cpu]=ia64_pal_mc_error_inject_phys(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + case 2: /* Call pal_mc_error_inject in virtual mode. */ + status[cpu]=ia64_pal_mc_error_inject_virt(err_type_info[cpu], + err_struct_info[cpu], + ia64_tpa(&err_data_buffer[cpu]), + &capabilities[cpu], + &resources[cpu]); + break; + default: + status[cpu] = -EINVAL; + break; + } + +#ifdef ERR_INJ_DEBUG + printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]); + printk(KERN_DEBUG "capabilities=%llx,\n", capabilities[cpu]); + printk(KERN_DEBUG "resources=%llx\n", resources[cpu]); +#endif + return size; +} + +show(err_type_info) +store(err_type_info) + +static ssize_t +show_virtual_to_phys(struct device *dev, struct device_attribute *attr, + char *buf) +{ + unsigned int cpu=dev->id; + return sprintf(buf, "%llx\n", phys_addr[cpu]); +} + +static ssize_t +store_virtual_to_phys(struct device *dev, struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + u64 virt_addr=simple_strtoull(buf, NULL, 16); + int ret; + + ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL); + if (ret<=0) { +#ifdef ERR_INJ_DEBUG + printk("Virtual address %llx is not existing.\n", virt_addr); +#endif + return -EINVAL; + } + + phys_addr[cpu] = ia64_tpa(virt_addr); + return size; +} + +show(err_struct_info) +store(err_struct_info) + +static ssize_t +show_err_data_buffer(struct device *dev, + struct device_attribute *attr, char *buf) +{ + unsigned int cpu=dev->id; + + return sprintf(buf, "%llx, %llx, %llx\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3); +} + +static ssize_t +store_err_data_buffer(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t size) +{ + unsigned int cpu=dev->id; + int ret; + +#ifdef ERR_INJ_DEBUG + printk("write err_data_buffer=[%llx,%llx,%llx] on cpu%d\n", + err_data_buffer[cpu].data1, + err_data_buffer[cpu].data2, + err_data_buffer[cpu].data3, + cpu); +#endif + ret = sscanf(buf, "%llx, %llx, %llx", + &err_data_buffer[cpu].data1, + &err_data_buffer[cpu].data2, + &err_data_buffer[cpu].data3); + if (ret!=ERR_DATA_BUFFER_SIZE) + return -EINVAL; + + return size; +} + +show(status) +show(capabilities) +show(resources) + +define_one_rw(call_start); +define_one_rw(err_type_info); +define_one_rw(err_struct_info); +define_one_rw(err_data_buffer); +define_one_rw(virtual_to_phys); +define_one_ro(status); +define_one_ro(capabilities); +define_one_ro(resources); + +static struct attribute *default_attrs[] = { + &dev_attr_call_start.attr, + &dev_attr_virtual_to_phys.attr, + &dev_attr_err_type_info.attr, + &dev_attr_err_struct_info.attr, + &dev_attr_err_data_buffer.attr, + &dev_attr_status.attr, + &dev_attr_capabilities.attr, + &dev_attr_resources.attr, + NULL +}; + +static struct attribute_group err_inject_attr_group = { + .attrs = default_attrs, + .name = "err_inject" +}; +/* Add/Remove err_inject interface for CPU device */ +static int err_inject_add_dev(unsigned int cpu) +{ + struct device *sys_dev = get_cpu_device(cpu); + + return sysfs_create_group(&sys_dev->kobj, &err_inject_attr_group); +} + +static int err_inject_remove_dev(unsigned int cpu) +{ + struct device *sys_dev = get_cpu_device(cpu); + + sysfs_remove_group(&sys_dev->kobj, &err_inject_attr_group); + return 0; +} + +static enum cpuhp_state hp_online; + +static int __init err_inject_init(void) +{ + int ret; +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Enter error injection driver.\n"); +#endif + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/err_inj:online", + err_inject_add_dev, err_inject_remove_dev); + if (ret >= 0) { + hp_online = ret; + ret = 0; + } + return ret; +} + +static void __exit err_inject_exit(void) +{ +#ifdef ERR_INJ_DEBUG + printk(KERN_INFO "Exit error injection driver.\n"); +#endif + cpuhp_remove_state(hp_online); +} + +module_init(err_inject_init); +module_exit(err_inject_exit); + +MODULE_AUTHOR("Fenghua Yu "); +MODULE_DESCRIPTION("MC error injection kernel sysfs interface"); +MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/esi.c b/arch/ia64/kernel/esi.c new file mode 100644 index 000000000000..4df57c93e0a8 --- /dev/null +++ b/arch/ia64/kernel/esi.c @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Extensible SAL Interface (ESI) support routines. + * + * Copyright (C) 2006 Hewlett-Packard Co + * Alex Williamson + */ +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Alex Williamson "); +MODULE_DESCRIPTION("Extensible SAL Interface (ESI) support"); +MODULE_LICENSE("GPL"); + +#define MODULE_NAME "esi" + +enum esi_systab_entry_type { + ESI_DESC_ENTRY_POINT = 0 +}; + +/* + * Entry type: Size: + * 0 48 + */ +#define ESI_DESC_SIZE(type) "\060"[(unsigned) (type)] + +typedef struct ia64_esi_desc_entry_point { + u8 type; + u8 reserved1[15]; + u64 esi_proc; + u64 gp; + efi_guid_t guid; +} ia64_esi_desc_entry_point_t; + +struct pdesc { + void *addr; + void *gp; +}; + +static struct ia64_sal_systab *esi_systab; + +extern unsigned long esi_phys; + +static int __init esi_init (void) +{ + struct ia64_sal_systab *systab; + char *p; + int i; + + if (esi_phys == EFI_INVALID_TABLE_ADDR) + return -ENODEV; + + systab = __va(esi_phys); + + if (strncmp(systab->signature, "ESIT", 4) != 0) { + printk(KERN_ERR "bad signature in ESI system table!"); + return -ENODEV; + } + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case ESI_DESC_ENTRY_POINT: + break; + default: + printk(KERN_WARNING "Unknown table type %d found in " + "ESI table, ignoring rest of table\n", *p); + return -ENODEV; + } + + p += ESI_DESC_SIZE(*p); + } + + esi_systab = systab; + return 0; +} + + +int ia64_esi_call (efi_guid_t guid, struct ia64_sal_retval *isrvp, + enum esi_proc_type proc_type, u64 func, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags = 0; + int i; + char *p; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = __va(esi->esi_proc); + pdesc.gp = __va(esi->gp); + + esi_proc = (ia64_sal_handler) &pdesc; + + ia64_save_scratch_fpregs(fr); + if (proc_type == ESI_PROC_SERIALIZED) + spin_lock_irqsave(&sal_lock, flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_save(flags); + else + preempt_disable(); + *isrvp = (*esi_proc)(func, arg1, arg2, arg3, + arg4, arg5, arg6, arg7); + if (proc_type == ESI_PROC_SERIALIZED) + spin_unlock_irqrestore(&sal_lock, + flags); + else if (proc_type == ESI_PROC_MP_SAFE) + local_irq_restore(flags); + else + preempt_enable(); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call); + +int ia64_esi_call_phys (efi_guid_t guid, struct ia64_sal_retval *isrvp, + u64 func, u64 arg1, u64 arg2, u64 arg3, u64 arg4, + u64 arg5, u64 arg6, u64 arg7) +{ + struct ia64_fpreg fr[6]; + unsigned long flags; + u64 esi_params[8]; + char *p; + int i; + + if (!esi_systab) + return -1; + + p = (char *) (esi_systab + 1); + for (i = 0; i < esi_systab->entry_count; i++) { + if (*p == ESI_DESC_ENTRY_POINT) { + ia64_esi_desc_entry_point_t *esi = (void *)p; + if (!efi_guidcmp(guid, esi->guid)) { + ia64_sal_handler esi_proc; + struct pdesc pdesc; + + pdesc.addr = (void *)esi->esi_proc; + pdesc.gp = (void *)esi->gp; + + esi_proc = (ia64_sal_handler) &pdesc; + + esi_params[0] = func; + esi_params[1] = arg1; + esi_params[2] = arg2; + esi_params[3] = arg3; + esi_params[4] = arg4; + esi_params[5] = arg5; + esi_params[6] = arg6; + esi_params[7] = arg7; + ia64_save_scratch_fpregs(fr); + spin_lock_irqsave(&sal_lock, flags); + *isrvp = esi_call_phys(esi_proc, esi_params); + spin_unlock_irqrestore(&sal_lock, flags); + ia64_load_scratch_fpregs(fr); + return 0; + } + } + p += ESI_DESC_SIZE(*p); + } + return -1; +} +EXPORT_SYMBOL_GPL(ia64_esi_call_phys); + +static void __exit esi_exit (void) +{ +} + +module_init(esi_init); +module_exit(esi_exit); /* makes module removable... */ diff --git a/arch/ia64/kernel/esi_stub.S b/arch/ia64/kernel/esi_stub.S new file mode 100644 index 000000000000..9928c5b2957c --- /dev/null +++ b/arch/ia64/kernel/esi_stub.S @@ -0,0 +1,99 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * ESI call stub. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Alex Williamson + * + * Based on EFI call stub by David Mosberger. The stub is virtually + * identical to the one for EFI phys-mode calls, except that ESI + * calls may have up to 8 arguments, so they get passed to this routine + * through memory. + * + * This stub allows us to make ESI calls in physical mode with interrupts + * turned off. ESI calls may not support calling from virtual mode. + * + * Google for "Extensible SAL specification" for a document describing the + * ESI standard. + */ + +/* + * PSR settings as per SAL spec (Chapter 8 in the "IA-64 System + * Abstraction Layer Specification", revision 2.6e). Note that + * psr.dfl and psr.dfh MUST be cleared, despite what this manual says. + * Otherwise, SAL dies whenever it's trying to do an IA-32 BIOS call + * (the br.ia instruction fails unless psr.dfl and psr.dfh are + * cleared). Fortunately, SAL promises not to touch the floating + * point regs, so at least we don't have to save f2-f127. + */ +#define PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_RT | \ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PSR_BITS_TO_SET \ + (IA64_PSR_BN) + +#include +#include +#include + +/* + * Inputs: + * in0 = address of function descriptor of ESI routine to call + * in1 = address of array of ESI parameters + * + * Outputs: + * r8 = result returned by called function + */ +GLOBAL_ENTRY(esi_call_phys) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(2) + alloc loc1=ar.pfs,2,7,8,0 + ld8 r2=[in0],8 // load ESI function's entry point + mov loc0=rp + .body + ;; + ld8 out0=[in1],8 // ESI params loaded from array + ;; // passing all as inputs doesn't work + ld8 out1=[in1],8 + ;; + ld8 out2=[in1],8 + ;; + ld8 out3=[in1],8 + ;; + ld8 out4=[in1],8 + ;; + ld8 out5=[in1],8 + ;; + ld8 out6=[in1],8 + ;; + ld8 out7=[in1] + mov loc2=gp // save global pointer + mov loc4=ar.rsc // save RSE configuration + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + ;; + ld8 gp=[in0] // load ESI function's global pointer + movl r16=PSR_BITS_TO_CLEAR + mov loc3=psr // save processor status word + movl r17=PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 + mov b6=r2 + ;; + andcm r16=loc3,r16 // get psr with IT, DT, and RT bits cleared + br.call.sptk.many rp=ia64_switch_mode_phys +.ret0: mov loc5=r19 // old ar.bsp + mov loc6=r20 // old sp + br.call.sptk.many rp=b6 // call the ESI function +.ret1: mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // save virtual mode psr + mov r19=loc5 // save virtual mode bspstore + mov r20=loc6 // save virtual mode sp + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode +.ret2: mov ar.rsc=loc4 // restore RSE configuration + mov ar.pfs=loc1 + mov rp=loc0 + mov gp=loc2 + br.ret.sptk.many rp +END(esi_call_phys) +EXPORT_SYMBOL_GPL(esi_call_phys) diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S new file mode 100644 index 000000000000..cc4733e9990a --- /dev/null +++ b/arch/ia64/kernel/fsys.S @@ -0,0 +1,837 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file contains the light-weight system call handlers (fsyscall-handlers). + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 25-Sep-03 davidm Implement fsys_rt_sigprocmask(). + * 18-Feb-03 louisk Implement fsys_gettimeofday(). + * 28-Feb-03 davidm Fixed several bugs in fsys_gettimeofday(). Tuned it some more, + * probably broke it along the way... ;-) + * 13-Jul-04 clameter Implement fsys_clock_gettime and revise fsys_gettimeofday to make + * it capable of using memory based clocks without falling back to C code. + * 08-Feb-07 Fenghua Yu Implement fsys_getcpu. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" +#include + +/* + * See Documentation/arch/ia64/fsys.rst for details on fsyscalls. + * + * On entry to an fsyscall handler: + * r10 = 0 (i.e., defaults to "successful syscall return") + * r11 = saved ar.pfs (a user-level value) + * r15 = system call number + * r16 = "current" task pointer (in normal kernel-mode, this is in r13) + * r32-r39 = system call arguments + * b6 = return address (a user-level value) + * ar.pfs = previous frame-state (a user-level value) + * PSR.be = cleared to zero (i.e., little-endian byte order is in effect) + * all other registers may contain values passed in from user-mode + * + * On return from an fsyscall handler: + * r11 = saved ar.pfs (as passed into the fsyscall handler) + * r15 = system call number (as passed into the fsyscall handler) + * r32-r39 = system call arguments (as passed into the fsyscall handler) + * b6 = return address (as passed into the fsyscall handler) + * ar.pfs = previous frame-state (as passed into the fsyscall handler) + */ + +ENTRY(fsys_ni_syscall) + .prologue + .altrp b6 + .body + mov r8=ENOSYS + mov r10=-1 + FSYS_RETURN +END(fsys_ni_syscall) + +ENTRY(fsys_getpid) + .prologue + .altrp b6 + .body + add r17=IA64_TASK_SIGNAL_OFFSET,r16 + ;; + ld8 r17=[r17] // r17 = current->signal + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + ;; + ld4 r9=[r9] + add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17 + ;; + and r9=TIF_ALLWORK_MASK,r9 + ld8 r17=[r17] // r17 = current->signal->pids[PIDTYPE_TGID] + ;; + add r8=IA64_PID_LEVEL_OFFSET,r17 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + mov r17=0 + ;; + cmp.ne p8,p0=0,r9 +(p8) br.spnt.many fsys_fallback_syscall + FSYS_RETURN +END(fsys_getpid) + +ENTRY(fsys_set_tid_address) + .prologue + .altrp b6 + .body + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 + add r17=IA64_TASK_THREAD_PID_OFFSET,r16 + ;; + ld4 r9=[r9] + tnat.z p6,p7=r32 // check argument register for being NaT + ld8 r17=[r17] // r17 = current->thread_pid + ;; + and r9=TIF_ALLWORK_MASK,r9 + add r8=IA64_PID_LEVEL_OFFSET,r17 + add r18=IA64_TASK_CLEAR_CHILD_TID_OFFSET,r16 + ;; + ld4 r8=[r8] // r8 = pid->level + add r17=IA64_PID_UPID_OFFSET,r17 // r17 = &pid->numbers[0] + ;; + shl r8=r8,IA64_UPID_SHIFT + ;; + add r17=r17,r8 // r17 = &pid->numbers[pid->level] + ;; + ld4 r8=[r17] // r8 = pid->numbers[pid->level].nr + ;; + cmp.ne p8,p0=0,r9 + mov r17=-1 + ;; +(p6) st8 [r18]=r32 +(p7) st8 [r18]=r17 +(p8) br.spnt.many fsys_fallback_syscall + ;; + mov r17=0 // i must not leak kernel bits... + mov r18=0 // i must not leak kernel bits... + FSYS_RETURN +END(fsys_set_tid_address) + +#if IA64_GTOD_SEQ_OFFSET !=0 +#error fsys_gettimeofday incompatible with changes to struct fsyscall_gtod_data_t +#endif +#if IA64_ITC_JITTER_OFFSET !=0 +#error fsys_gettimeofday incompatible with changes to struct itc_jitter_data_t +#endif +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_DIVIDE_BY_1000 0x4000 +#define CLOCK_ADD_MONOTONIC 0x8000 + +ENTRY(fsys_gettimeofday) + .prologue + .altrp b6 + .body + mov r31 = r32 + tnat.nz p6,p0 = r33 // guard against NaT argument +(p6) br.cond.spnt.few .fail_einval + mov r30 = CLOCK_DIVIDE_BY_1000 + ;; +.gettime: + // Register map + // Incoming r31 = pointer to address where to place result + // r30 = flags determining how time is processed + // r2,r3 = temp r4-r7 preserved + // r8 = result nanoseconds + // r9 = result seconds + // r10 = temporary storage for clock difference + // r11 = preserved: saved ar.pfs + // r12 = preserved: memory stack + // r13 = preserved: thread pointer + // r14 = address of mask / mask value + // r15 = preserved: system call number + // r16 = preserved: current task pointer + // r17 = (not used) + // r18 = (not used) + // r19 = address of itc_lastcycle + // r20 = struct fsyscall_gtod_data (= address of gtod_lock.sequence) + // r21 = address of mmio_ptr + // r22 = address of wall_time or monotonic_time + // r23 = address of shift / value + // r24 = address mult factor / cycle_last value + // r25 = itc_lastcycle value + // r26 = address clocksource cycle_last + // r27 = (not used) + // r28 = sequence number at the beginning of critical section + // r29 = address of itc_jitter + // r30 = time processing flags / memory address + // r31 = pointer to result + // Predicates + // p6,p7 short term use + // p8 = timesource ar.itc + // p9 = timesource mmio64 + // p10 = timesource mmio32 - not used + // p11 = timesource not to be handled by asm code + // p12 = memory time source ( = p9 | p10) - not used + // p13 = do cmpxchg with itc_lastcycle + // p14 = Divide by 1000 + // p15 = Add monotonic + // + // Note that instructions are optimized for McKinley. McKinley can + // process two bundles simultaneously and therefore we continuously + // try to feed the CPU two bundles and then a stop. + + add r2 = TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r31 // guard against Nat argument +(p6) br.cond.spnt.few .fail_einval + movl r20 = fsyscall_gtod_data // load fsyscall gettimeofday data address + ;; + ld4 r2 = [r2] // process work pending flags + movl r29 = itc_jitter_data // itc_jitter + add r22 = IA64_GTOD_WALL_TIME_OFFSET,r20 // wall_time + add r21 = IA64_CLKSRC_MMIO_OFFSET,r20 + mov pr = r30,0xc000 // Set predicates according to function + ;; + and r2 = TIF_ALLWORK_MASK,r2 + add r19 = IA64_ITC_LASTCYCLE_OFFSET,r29 +(p15) add r22 = IA64_GTOD_MONO_TIME_OFFSET,r20 // monotonic_time + ;; + add r26 = IA64_CLKSRC_CYCLE_LAST_OFFSET,r20 // clksrc_cycle_last + cmp.ne p6, p0 = 0, r2 // Fallback if work is scheduled +(p6) br.cond.spnt.many fsys_fallback_syscall + ;; + // Begin critical section +.time_redo: + ld4.acq r28 = [r20] // gtod_lock.sequence, Must take first + ;; + and r28 = ~1,r28 // And make sequence even to force retry if odd + ;; + ld8 r30 = [r21] // clocksource->mmio_ptr + add r24 = IA64_CLKSRC_MULT_OFFSET,r20 + ld4 r2 = [r29] // itc_jitter value + add r23 = IA64_CLKSRC_SHIFT_OFFSET,r20 + add r14 = IA64_CLKSRC_MASK_OFFSET,r20 + ;; + ld4 r3 = [r24] // clocksource mult value + ld8 r14 = [r14] // clocksource mask value + cmp.eq p8,p9 = 0,r30 // use cpu timer if no mmio_ptr + ;; + setf.sig f7 = r3 // Setup for mult scaling of counter +(p8) cmp.ne p13,p0 = r2,r0 // need itc_jitter compensation, set p13 + ld4 r23 = [r23] // clocksource shift value + ld8 r24 = [r26] // get clksrc_cycle_last value +(p9) cmp.eq p13,p0 = 0,r30 // if mmio_ptr, clear p13 jitter control + ;; + .pred.rel.mutex p8,p9 + MOV_FROM_ITC(p8, p6, r2, r10) // CPU_TIMER. 36 clocks latency!!! +(p9) ld8 r2 = [r30] // MMIO_TIMER. Could also have latency issues.. +(p13) ld8 r25 = [r19] // get itc_lastcycle value + ld8 r9 = [r22],IA64_TIME_SN_SPEC_SNSEC_OFFSET // sec + ;; + ld8 r8 = [r22],-IA64_TIME_SN_SPEC_SNSEC_OFFSET // snsec +(p13) sub r3 = r25,r2 // Diff needed before comparison (thanks davidm) + ;; +(p13) cmp.gt.unc p6,p7 = r3,r0 // check if it is less than last. p6,p7 cleared + sub r10 = r2,r24 // current_cycle - last_cycle + ;; +(p6) sub r10 = r25,r24 // time we got was less than last_cycle +(p7) mov ar.ccv = r25 // more than last_cycle. Prep for cmpxchg + ;; +(p7) cmpxchg8.rel r3 = [r19],r2,ar.ccv + ;; +(p7) cmp.ne p7,p0 = r25,r3 // if cmpxchg not successful + ;; +(p7) sub r10 = r3,r24 // then use new last_cycle instead + ;; + and r10 = r10,r14 // Apply mask + ;; + setf.sig f8 = r10 + nop.i 123 + ;; + // fault check takes 5 cycles and we have spare time +EX(.fail_efault, probe.w.fault r31, 3) + xmpy.l f8 = f8,f7 // nsec_per_cyc*(counter-last_counter) + ;; + getf.sig r2 = f8 + mf + ;; + ld4 r10 = [r20] // gtod_lock.sequence + add r8 = r8,r2 // Add xtime.nsecs + ;; + shr.u r8 = r8,r23 // shift by factor + cmp4.ne p7,p0 = r28,r10 +(p7) br.cond.dpnt.few .time_redo // sequence number changed, redo + // End critical section. + // Now r8=tv->tv_nsec and r9=tv->tv_sec + mov r10 = r0 + movl r2 = 1000000000 + add r23 = IA64_TIMESPEC_TV_NSEC_OFFSET, r31 +(p14) movl r3 = 2361183241434822607 // Prep for / 1000 hack + ;; +.time_normalize: + mov r21 = r8 + cmp.ge p6,p0 = r8,r2 +(p14) shr.u r20 = r8, 3 // We can repeat this if necessary just wasting time + ;; +(p14) setf.sig f8 = r20 +(p6) sub r8 = r8,r2 +(p6) add r9 = 1,r9 // two nops before the branch. +(p14) setf.sig f7 = r3 // Chances for repeats are 1 in 10000 for gettod +(p6) br.cond.dpnt.few .time_normalize + ;; + // Divided by 8 though shift. Now divide by 125 + // The compiler was able to do that with a multiply + // and a shift and we do the same +EX(.fail_efault, probe.w.fault r23, 3) // This also costs 5 cycles +(p14) xmpy.hu f8 = f8, f7 // xmpy has 5 cycles latency so use it + ;; +(p14) getf.sig r2 = f8 + ;; + mov r8 = r0 +(p14) shr.u r21 = r2, 4 + ;; +EX(.fail_efault, st8 [r31] = r9) +EX(.fail_efault, st8 [r23] = r21) + FSYS_RETURN +.fail_einval: + mov r8 = EINVAL + mov r10 = -1 + FSYS_RETURN +.fail_efault: + mov r8 = EFAULT + mov r10 = -1 + FSYS_RETURN +END(fsys_gettimeofday) + +ENTRY(fsys_clock_gettime) + .prologue + .altrp b6 + .body + cmp4.ltu p6, p0 = CLOCK_MONOTONIC, r32 + // Fallback if this is not CLOCK_REALTIME or CLOCK_MONOTONIC +(p6) br.spnt.few fsys_fallback_syscall + mov r31 = r33 + shl r30 = r32,15 + br.many .gettime +END(fsys_clock_gettime) + +/* + * fsys_getcpu doesn't use the third parameter in this implementation. It reads + * current_thread_info()->cpu and corresponding node in cpu_to_node_map. + */ +ENTRY(fsys_getcpu) + .prologue + .altrp b6 + .body + ;; + add r2=TI_FLAGS+IA64_TASK_SIZE,r16 + tnat.nz p6,p0 = r32 // guard against NaT argument + add r3=TI_CPU+IA64_TASK_SIZE,r16 + ;; + ld4 r3=[r3] // M r3 = thread_info->cpu + ld4 r2=[r2] // M r2 = thread_info->flags +(p6) br.cond.spnt.few .fail_einval // B + ;; + tnat.nz p7,p0 = r33 // I guard against NaT argument +(p7) br.cond.spnt.few .fail_einval // B + ;; + cmp.ne p6,p0=r32,r0 + cmp.ne p7,p0=r33,r0 + ;; +#ifdef CONFIG_NUMA + movl r17=cpu_to_node_map + ;; +EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles +EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles + shladd r18=r3,1,r17 + ;; + ld2 r20=[r18] // r20 = cpu_to_node_map[cpu] + and r2 = TIF_ALLWORK_MASK,r2 + ;; + cmp.ne p8,p0=0,r2 +(p8) br.spnt.many fsys_fallback_syscall + ;; + ;; +EX(.fail_efault, (p6) st4 [r32] = r3) +EX(.fail_efault, (p7) st2 [r33] = r20) + mov r8=0 + ;; +#else +EX(.fail_efault, (p6) probe.w.fault r32, 3) // M This takes 5 cycles +EX(.fail_efault, (p7) probe.w.fault r33, 3) // M This takes 5 cycles + and r2 = TIF_ALLWORK_MASK,r2 + ;; + cmp.ne p8,p0=0,r2 +(p8) br.spnt.many fsys_fallback_syscall + ;; +EX(.fail_efault, (p6) st4 [r32] = r3) +EX(.fail_efault, (p7) st2 [r33] = r0) + mov r8=0 + ;; +#endif + FSYS_RETURN +END(fsys_getcpu) + +ENTRY(fsys_fallback_syscall) + .prologue + .altrp b6 + .body + /* + * We only get here from light-weight syscall handlers. Thus, we already + * know that r15 contains a valid syscall number. No need to re-check. + */ + adds r17=-1024,r15 + movl r14=sys_call_table + ;; + RSM_PSR_I(p0, r26, r27) + shladd r18=r17,3,r14 + ;; + ld8 r18=[r18] // load normal (heavy-weight) syscall entry-point + MOV_FROM_PSR(p0, r29, r26) // read psr (12 cyc load latency) + mov r27=ar.rsc + mov r21=ar.fpsr + mov r26=ar.pfs +END(fsys_fallback_syscall) + /* FALL THROUGH */ +GLOBAL_ENTRY(fsys_bubble_down) + .prologue + .altrp b6 + .body + /* + * We get here for syscalls that don't have a lightweight + * handler. For those, we need to bubble down into the kernel + * and that requires setting up a minimal pt_regs structure, + * and initializing the CPU state more or less as if an + * interruption had occurred. To make syscall-restarts work, + * we setup pt_regs such that cr_iip points to the second + * instruction in syscall_via_break. Decrementing the IP + * hence will restart the syscall via break and not + * decrementing IP will return us to the caller, as usual. + * Note that we preserve the value of psr.pp rather than + * initializing it from dcr.pp. This makes it possible to + * distinguish fsyscall execution from other privileged + * execution. + * + * On entry: + * - normal fsyscall handler register usage, except + * that we also have: + * - r18: address of syscall entry point + * - r21: ar.fpsr + * - r26: ar.pfs + * - r27: ar.rsc + * - r29: psr + * + * We used to clear some PSR bits here but that requires slow + * serialization. Fortunately, that isn't really necessary. + * The rationale is as follows: we used to clear bits + * ~PSR_PRESERVED_BITS in PSR.L. Since + * PSR_PRESERVED_BITS==PSR.{UP,MFL,MFH,PK,DT,PP,SP,RT,IC}, we + * ended up clearing PSR.{BE,AC,I,DFL,DFH,DI,DB,SI,TB}. + * However, + * + * PSR.BE : already is turned off in __kernel_syscall_via_epc() + * PSR.AC : don't care (kernel normally turns PSR.AC on) + * PSR.I : already turned off by the time fsys_bubble_down gets + * invoked + * PSR.DFL: always 0 (kernel never turns it on) + * PSR.DFH: don't care --- kernel never touches f32-f127 on its own + * initiative + * PSR.DI : always 0 (kernel never turns it on) + * PSR.SI : always 0 (kernel never turns it on) + * PSR.DB : don't care --- kernel never enables kernel-level + * breakpoints + * PSR.TB : must be 0 already; if it wasn't zero on entry to + * __kernel_syscall_via_epc, the branch to fsys_bubble_down + * will trigger a taken branch; the taken-trap-handler then + * converts the syscall into a break-based system-call. + */ + /* + * Reading psr.l gives us only bits 0-31, psr.it, and psr.mc. + * The rest we have to synthesize. + */ +# define PSR_ONE_BITS ((3 << IA64_PSR_CPL0_BIT) \ + | (0x1 << IA64_PSR_RI_BIT) \ + | IA64_PSR_BN | IA64_PSR_I) + + invala // M0|1 + movl r14=ia64_ret_from_syscall // X + + nop.m 0 + movl r28=__kernel_syscall_via_break // X create cr.iip + ;; + + mov r2=r16 // A get task addr to addl-addressable register + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 // A + mov r31=pr // I0 save pr (2 cyc) + ;; + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag + addl r22=IA64_RBS_OFFSET,r2 // A compute base of RBS + add r3=TI_FLAGS+IA64_TASK_SIZE,r2 // A + ;; + ld4 r3=[r3] // M0|1 r3 = current_thread_info()->flags + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch register backing-store + nop.i 0 + ;; + mov ar.rsc=0 // M2 set enforced lazy mode, pl 0, LE, loadrs=0 +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + MOV_FROM_ITC(p0, p6, r30, r23) // M get cycle for accounting +#else + nop.m 0 +#endif + nop.i 0 + ;; + mov r23=ar.bspstore // M2 (12 cyc) save ar.bspstore + mov.m r24=ar.rnat // M2 (5 cyc) read ar.rnat (dual-issues!) + nop.i 0 + ;; + mov ar.bspstore=r22 // M2 (6 cyc) switch to kernel RBS + movl r8=PSR_ONE_BITS // X + ;; + mov r25=ar.unat // M2 (5 cyc) save ar.unat + mov r19=b6 // I0 save b6 (2 cyc) + mov r20=r1 // A save caller's gp in r20 + ;; + or r29=r8,r29 // A construct cr.ipsr value to save + mov b6=r18 // I0 copy syscall entry-point to b6 (7 cyc) + addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 // A compute base of memory stack + + mov r18=ar.bsp // M2 save (kernel) ar.bsp (12 cyc) + cmp.ne pKStk,pUStk=r0,r0 // A set pKStk <- 0, pUStk <- 1 + br.call.sptk.many b7=ia64_syscall_setup // B + ;; +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + // mov.m r30=ar.itc is called in advance + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r2 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r2 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at leave kernel + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r30,r19 // elapsed time in user mode + ;; + add r20=r20,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r20 // update stime + st8 [r17]=r21 // update utime + ;; +#endif + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + mov rp=r14 // I0 set the real return addr + and r3=_TIF_SYSCALL_TRACEAUDIT,r3 // A + ;; + SSM_PSR_I(p0, p6, r22) // M2 we're on kernel stacks now, reenable irqs + cmp.eq p8,p0=r3,r0 // A +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + + nop.m 0 +(p8) br.call.sptk.many b6=b6 // B (ignore return address) + br.cond.spnt ia64_trace_syscall // B +END(fsys_bubble_down) + + .rodata + .align 8 + .globl fsyscall_table + + data8 fsys_bubble_down +fsyscall_table: + data8 fsys_ni_syscall + data8 0 // exit // 1025 + data8 0 // read + data8 0 // write + data8 0 // open + data8 0 // close + data8 0 // creat // 1030 + data8 0 // link + data8 0 // unlink + data8 0 // execve + data8 0 // chdir + data8 0 // fchdir // 1035 + data8 0 // utimes + data8 0 // mknod + data8 0 // chmod + data8 0 // chown + data8 0 // lseek // 1040 + data8 fsys_getpid // getpid + data8 0 // getppid + data8 0 // mount + data8 0 // umount + data8 0 // setuid // 1045 + data8 0 // getuid + data8 0 // geteuid + data8 0 // ptrace + data8 0 // access + data8 0 // sync // 1050 + data8 0 // fsync + data8 0 // fdatasync + data8 0 // kill + data8 0 // rename + data8 0 // mkdir // 1055 + data8 0 // rmdir + data8 0 // dup + data8 0 // pipe + data8 0 // times + data8 0 // brk // 1060 + data8 0 // setgid + data8 0 // getgid + data8 0 // getegid + data8 0 // acct + data8 0 // ioctl // 1065 + data8 0 // fcntl + data8 0 // umask + data8 0 // chroot + data8 0 // ustat + data8 0 // dup2 // 1070 + data8 0 // setreuid + data8 0 // setregid + data8 0 // getresuid + data8 0 // setresuid + data8 0 // getresgid // 1075 + data8 0 // setresgid + data8 0 // getgroups + data8 0 // setgroups + data8 0 // getpgid + data8 0 // setpgid // 1080 + data8 0 // setsid + data8 0 // getsid + data8 0 // sethostname + data8 0 // setrlimit + data8 0 // getrlimit // 1085 + data8 0 // getrusage + data8 fsys_gettimeofday // gettimeofday + data8 0 // settimeofday + data8 0 // select + data8 0 // poll // 1090 + data8 0 // symlink + data8 0 // readlink + data8 0 // uselib + data8 0 // swapon + data8 0 // swapoff // 1095 + data8 0 // reboot + data8 0 // truncate + data8 0 // ftruncate + data8 0 // fchmod + data8 0 // fchown // 1100 + data8 0 // getpriority + data8 0 // setpriority + data8 0 // statfs + data8 0 // fstatfs + data8 0 // gettid // 1105 + data8 0 // semget + data8 0 // semop + data8 0 // semctl + data8 0 // msgget + data8 0 // msgsnd // 1110 + data8 0 // msgrcv + data8 0 // msgctl + data8 0 // shmget + data8 0 // shmat + data8 0 // shmdt // 1115 + data8 0 // shmctl + data8 0 // syslog + data8 0 // setitimer + data8 0 // getitimer + data8 0 // 1120 + data8 0 + data8 0 + data8 0 // vhangup + data8 0 // lchown + data8 0 // remap_file_pages // 1125 + data8 0 // wait4 + data8 0 // sysinfo + data8 0 // clone + data8 0 // setdomainname + data8 0 // newuname // 1130 + data8 0 // adjtimex + data8 0 + data8 0 // init_module + data8 0 // delete_module + data8 0 // 1135 + data8 0 + data8 0 // quotactl + data8 0 // bdflush + data8 0 // sysfs + data8 0 // personality // 1140 + data8 0 // afs_syscall + data8 0 // setfsuid + data8 0 // setfsgid + data8 0 // getdents + data8 0 // flock // 1145 + data8 0 // readv + data8 0 // writev + data8 0 // pread64 + data8 0 // pwrite64 + data8 0 // sysctl // 1150 + data8 0 // mmap + data8 0 // munmap + data8 0 // mlock + data8 0 // mlockall + data8 0 // mprotect // 1155 + data8 0 // mremap + data8 0 // msync + data8 0 // munlock + data8 0 // munlockall + data8 0 // sched_getparam // 1160 + data8 0 // sched_setparam + data8 0 // sched_getscheduler + data8 0 // sched_setscheduler + data8 0 // sched_yield + data8 0 // sched_get_priority_max // 1165 + data8 0 // sched_get_priority_min + data8 0 // sched_rr_get_interval + data8 0 // nanosleep + data8 0 // nfsservctl + data8 0 // prctl // 1170 + data8 0 // getpagesize + data8 0 // mmap2 + data8 0 // pciconfig_read + data8 0 // pciconfig_write + data8 0 // perfmonctl // 1175 + data8 0 // sigaltstack + data8 0 // rt_sigaction + data8 0 // rt_sigpending + data8 0 // rt_sigprocmask + data8 0 // rt_sigqueueinfo // 1180 + data8 0 // rt_sigreturn + data8 0 // rt_sigsuspend + data8 0 // rt_sigtimedwait + data8 0 // getcwd + data8 0 // capget // 1185 + data8 0 // capset + data8 0 // sendfile + data8 0 + data8 0 + data8 0 // socket // 1190 + data8 0 // bind + data8 0 // connect + data8 0 // listen + data8 0 // accept + data8 0 // getsockname // 1195 + data8 0 // getpeername + data8 0 // socketpair + data8 0 // send + data8 0 // sendto + data8 0 // recv // 1200 + data8 0 // recvfrom + data8 0 // shutdown + data8 0 // setsockopt + data8 0 // getsockopt + data8 0 // sendmsg // 1205 + data8 0 // recvmsg + data8 0 // pivot_root + data8 0 // mincore + data8 0 // madvise + data8 0 // newstat // 1210 + data8 0 // newlstat + data8 0 // newfstat + data8 0 // clone2 + data8 0 // getdents64 + data8 0 // getunwind // 1215 + data8 0 // readahead + data8 0 // setxattr + data8 0 // lsetxattr + data8 0 // fsetxattr + data8 0 // getxattr // 1220 + data8 0 // lgetxattr + data8 0 // fgetxattr + data8 0 // listxattr + data8 0 // llistxattr + data8 0 // flistxattr // 1225 + data8 0 // removexattr + data8 0 // lremovexattr + data8 0 // fremovexattr + data8 0 // tkill + data8 0 // futex // 1230 + data8 0 // sched_setaffinity + data8 0 // sched_getaffinity + data8 fsys_set_tid_address // set_tid_address + data8 0 // fadvise64_64 + data8 0 // tgkill // 1235 + data8 0 // exit_group + data8 0 // lookup_dcookie + data8 0 // io_setup + data8 0 // io_destroy + data8 0 // io_getevents // 1240 + data8 0 // io_submit + data8 0 // io_cancel + data8 0 // epoll_create + data8 0 // epoll_ctl + data8 0 // epoll_wait // 1245 + data8 0 // restart_syscall + data8 0 // semtimedop + data8 0 // timer_create + data8 0 // timer_settime + data8 0 // timer_gettime // 1250 + data8 0 // timer_getoverrun + data8 0 // timer_delete + data8 0 // clock_settime + data8 fsys_clock_gettime // clock_gettime + data8 0 // clock_getres // 1255 + data8 0 // clock_nanosleep + data8 0 // fstatfs64 + data8 0 // statfs64 + data8 0 // mbind + data8 0 // get_mempolicy // 1260 + data8 0 // set_mempolicy + data8 0 // mq_open + data8 0 // mq_unlink + data8 0 // mq_timedsend + data8 0 // mq_timedreceive // 1265 + data8 0 // mq_notify + data8 0 // mq_getsetattr + data8 0 // kexec_load + data8 0 // vserver + data8 0 // waitid // 1270 + data8 0 // add_key + data8 0 // request_key + data8 0 // keyctl + data8 0 // ioprio_set + data8 0 // ioprio_get // 1275 + data8 0 // move_pages + data8 0 // inotify_init + data8 0 // inotify_add_watch + data8 0 // inotify_rm_watch + data8 0 // migrate_pages // 1280 + data8 0 // openat + data8 0 // mkdirat + data8 0 // mknodat + data8 0 // fchownat + data8 0 // futimesat // 1285 + data8 0 // newfstatat + data8 0 // unlinkat + data8 0 // renameat + data8 0 // linkat + data8 0 // symlinkat // 1290 + data8 0 // readlinkat + data8 0 // fchmodat + data8 0 // faccessat + data8 0 + data8 0 // 1295 + data8 0 // unshare + data8 0 // splice + data8 0 // set_robust_list + data8 0 // get_robust_list + data8 0 // sync_file_range // 1300 + data8 0 // tee + data8 0 // vmsplice + data8 0 + data8 fsys_getcpu // getcpu // 1304 + + // fill in zeros for the remaining entries + .zero: + .space fsyscall_table + 8*NR_syscalls - .zero, 0 diff --git a/arch/ia64/kernel/fsyscall_gtod_data.h b/arch/ia64/kernel/fsyscall_gtod_data.h new file mode 100644 index 000000000000..cc2861445965 --- /dev/null +++ b/arch/ia64/kernel/fsyscall_gtod_data.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * (c) Copyright 2007 Hewlett-Packard Development Company, L.P. + * Contributed by Peter Keilty + * + * fsyscall gettimeofday data + */ + +/* like timespec, but includes "shifted nanoseconds" */ +struct time_sn_spec { + u64 sec; + u64 snsec; +}; + +struct fsyscall_gtod_data_t { + seqcount_t seq; + struct time_sn_spec wall_time; + struct time_sn_spec monotonic_time; + u64 clk_mask; + u32 clk_mult; + u32 clk_shift; + void *clk_fsys_mmio; + u64 clk_cycle_last; +} ____cacheline_aligned; + +struct itc_jitter_data_t { + int itc_jitter; + u64 itc_lastcycle; +} ____cacheline_aligned; + diff --git a/arch/ia64/kernel/ftrace.c b/arch/ia64/kernel/ftrace.c new file mode 100644 index 000000000000..d6360fd404ab --- /dev/null +++ b/arch/ia64/kernel/ftrace.c @@ -0,0 +1,196 @@ +/* + * Dynamic function tracing support. + * + * Copyright (C) 2008 Shaohua Li + * + * For licencing details, see COPYING. + * + * Defines low-level handling of mcount calls when the kernel + * is compiled with the -pg flag. When using dynamic ftrace, the + * mcount call-sites get patched lazily with NOP till they are + * enabled. All code mutation routines here take effect atomically. + */ + +#include +#include + +#include +#include + +/* In IA64, each function will be added below two bundles with -pg option */ +static unsigned char __attribute__((aligned(8))) +ftrace_orig_code[MCOUNT_INSN_SIZE] = { + 0x02, 0x40, 0x31, 0x10, 0x80, 0x05, /* alloc r40=ar.pfs,12,8,0 */ + 0xb0, 0x02, 0x00, 0x00, 0x42, 0x40, /* mov r43=r0;; */ + 0x05, 0x00, 0xc4, 0x00, /* mov r42=b0 */ + 0x11, 0x48, 0x01, 0x02, 0x00, 0x21, /* mov r41=r1 */ + 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, /* nop.i 0x0 */ + 0x08, 0x00, 0x00, 0x50 /* br.call.sptk.many b0 = _mcount;; */ +}; + +struct ftrace_orig_insn { + u64 dummy1, dummy2, dummy3; + u64 dummy4:64-41+13; + u64 imm20:20; + u64 dummy5:3; + u64 sign:1; + u64 dummy6:4; +}; + +/* mcount stub will be converted below for nop */ +static unsigned char ftrace_nop_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* nop.x 0x0;; */ + 0x00, 0x00, 0x04, 0x00 +}; + +static unsigned char *ftrace_nop_replace(void) +{ + return ftrace_nop_code; +} + +/* + * mcount stub will be converted below for call + * Note: Just the last instruction is changed against nop + * */ +static unsigned char __attribute__((aligned(8))) +ftrace_call_code[MCOUNT_INSN_SIZE] = { + 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MII] nop.m 0x0 */ + 0x30, 0x00, 0x00, 0x60, 0x00, 0x00, /* mov r3=ip */ + 0x00, 0x00, 0x04, 0x00, /* nop.i 0x0 */ + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0x0 */ + 0xff, 0xff, 0xff, 0xff, 0x7f, 0x00, /* brl.many .;;*/ + 0xf8, 0xff, 0xff, 0xc8 +}; + +struct ftrace_call_insn { + u64 dummy1, dummy2; + u64 dummy3:48; + u64 imm39_l:16; + u64 imm39_h:23; + u64 dummy4:13; + u64 imm20:20; + u64 dummy5:3; + u64 i:1; + u64 dummy6:4; +}; + +static unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr) +{ + struct ftrace_call_insn *code = (void *)ftrace_call_code; + unsigned long offset = addr - (ip + 0x10); + + code->imm39_l = offset >> 24; + code->imm39_h = offset >> 40; + code->imm20 = offset >> 4; + code->i = offset >> 63; + return ftrace_call_code; +} + +static int +ftrace_modify_code(unsigned long ip, unsigned char *old_code, + unsigned char *new_code, int do_check) +{ + unsigned char replaced[MCOUNT_INSN_SIZE]; + + /* + * Note: + * We are paranoid about modifying text, as if a bug was to happen, it + * could cause us to read or write to someplace that could cause harm. + * Carefully read and modify the code with probe_kernel_*(), and make + * sure what we read is what we expected it to be before modifying it. + */ + + if (!do_check) + goto skip_check; + + /* read the text we want to modify */ + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + + /* Make sure it is what we expect it to be */ + if (memcmp(replaced, old_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + +skip_check: + /* replace the text with the new text */ + if (copy_to_kernel_nofault(((void *)ip), new_code, MCOUNT_INSN_SIZE)) + return -EPERM; + flush_icache_range(ip, ip + MCOUNT_INSN_SIZE); + + return 0; +} + +static int ftrace_make_nop_check(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned char __attribute__((aligned(8))) replaced[MCOUNT_INSN_SIZE]; + unsigned long ip = rec->ip; + + if (copy_from_kernel_nofault(replaced, (void *)ip, MCOUNT_INSN_SIZE)) + return -EFAULT; + if (rec->flags & FTRACE_FL_CONVERTED) { + struct ftrace_call_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_call_code; + tmp_call = (void *)replaced; + call_insn->imm39_l = tmp_call->imm39_l; + call_insn->imm39_h = tmp_call->imm39_h; + call_insn->imm20 = tmp_call->imm20; + call_insn->i = tmp_call->i; + if (memcmp(replaced, ftrace_call_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } else { + struct ftrace_orig_insn *call_insn, *tmp_call; + + call_insn = (void *)ftrace_orig_code; + tmp_call = (void *)replaced; + call_insn->sign = tmp_call->sign; + call_insn->imm20 = tmp_call->imm20; + if (memcmp(replaced, ftrace_orig_code, MCOUNT_INSN_SIZE) != 0) + return -EINVAL; + return 0; + } +} + +int ftrace_make_nop(struct module *mod, + struct dyn_ftrace *rec, unsigned long addr) +{ + int ret; + char *new; + + ret = ftrace_make_nop_check(rec, addr); + if (ret) + return ret; + new = ftrace_nop_replace(); + return ftrace_modify_code(rec->ip, NULL, new, 0); +} + +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long ip = rec->ip; + unsigned char *old, *new; + + old= ftrace_nop_replace(); + new = ftrace_call_replace(ip, addr); + return ftrace_modify_code(ip, old, new, 1); +} + +/* in IA64, _mcount can't directly call ftrace_stub. Only jump is ok */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long ip; + unsigned long addr = ((struct fnptr *)ftrace_call)->ip; + + if (func == ftrace_stub) + return 0; + ip = ((struct fnptr *)func)->ip; + + ia64_patch_imm64(addr + 2, ip); + + flush_icache_range(addr, addr + 16); + return 0; +} diff --git a/arch/ia64/kernel/gate-data.S b/arch/ia64/kernel/gate-data.S new file mode 100644 index 000000000000..b3ef1c72e132 --- /dev/null +++ b/arch/ia64/kernel/gate-data.S @@ -0,0 +1,3 @@ + .section .data..gate, "aw" + + .incbin "arch/ia64/kernel/gate.so" diff --git a/arch/ia64/kernel/gate.S b/arch/ia64/kernel/gate.S new file mode 100644 index 000000000000..9f235cd551ab --- /dev/null +++ b/arch/ia64/kernel/gate.S @@ -0,0 +1,380 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file contains the code that gets mapped at the upper end of each task's text + * region. For now, it contains the signal trampoline code only. + * + * Copyright (C) 1999-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ + + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * We can't easily refer to symbols inside the kernel. To avoid full runtime relocation, + * complications with the linker (which likes to create PLT stubs for branches + * to targets outside the shared object) and to avoid multi-phase kernel builds, we + * simply create minimalistic "patch lists" in special ELF sections. + */ + .section ".data..patch.fsyscall_table", "a" + .previous +#define LOAD_FSYSCALL_TABLE(reg) \ +[1:] movl reg=0; \ + .xdata4 ".data..patch.fsyscall_table", 1b-. + + .section ".data..patch.brl_fsys_bubble_down", "a" + .previous +#define BRL_COND_FSYS_BUBBLE_DOWN(pr) \ +[1:](pr)brl.cond.sptk 0; \ + ;; \ + .xdata4 ".data..patch.brl_fsys_bubble_down", 1b-. + +GLOBAL_ENTRY(__kernel_syscall_via_break) + .prologue + .altrp b6 + .body + /* + * Note: for (fast) syscall restart to work, the break instruction must be + * the first one in the bundle addressed by syscall_via_break. + */ +{ .mib + break 0x100000 + nop.i 0 + br.ret.sptk.many b6 +} +END(__kernel_syscall_via_break) + +# define ARG0_OFF (16 + IA64_SIGFRAME_ARG0_OFFSET) +# define ARG1_OFF (16 + IA64_SIGFRAME_ARG1_OFFSET) +# define ARG2_OFF (16 + IA64_SIGFRAME_ARG2_OFFSET) +# define SIGHANDLER_OFF (16 + IA64_SIGFRAME_HANDLER_OFFSET) +# define SIGCONTEXT_OFF (16 + IA64_SIGFRAME_SIGCONTEXT_OFFSET) + +# define FLAGS_OFF IA64_SIGCONTEXT_FLAGS_OFFSET +# define CFM_OFF IA64_SIGCONTEXT_CFM_OFFSET +# define FR6_OFF IA64_SIGCONTEXT_FR6_OFFSET +# define BSP_OFF IA64_SIGCONTEXT_AR_BSP_OFFSET +# define RNAT_OFF IA64_SIGCONTEXT_AR_RNAT_OFFSET +# define UNAT_OFF IA64_SIGCONTEXT_AR_UNAT_OFFSET +# define FPSR_OFF IA64_SIGCONTEXT_AR_FPSR_OFFSET +# define PR_OFF IA64_SIGCONTEXT_PR_OFFSET +# define RP_OFF IA64_SIGCONTEXT_IP_OFFSET +# define SP_OFF IA64_SIGCONTEXT_R12_OFFSET +# define RBS_BASE_OFF IA64_SIGCONTEXT_RBS_BASE_OFFSET +# define LOADRS_OFF IA64_SIGCONTEXT_LOADRS_OFFSET +# define base0 r2 +# define base1 r3 + /* + * When we get here, the memory stack looks like this: + * + * +===============================+ + * | | + * // struct sigframe // + * | | + * +-------------------------------+ <-- sp+16 + * | 16 byte of scratch | + * | space | + * +-------------------------------+ <-- sp + * + * The register stack looks _exactly_ the way it looked at the time the signal + * occurred. In other words, we're treading on a potential mine-field: each + * incoming general register may be a NaT value (including sp, in which case the + * process ends up dying with a SIGSEGV). + * + * The first thing need to do is a cover to get the registers onto the backing + * store. Once that is done, we invoke the signal handler which may modify some + * of the machine state. After returning from the signal handler, we return + * control to the previous context by executing a sigreturn system call. A signal + * handler may call the rt_sigreturn() function to directly return to a given + * sigcontext. However, the user-level sigreturn() needs to do much more than + * calling the rt_sigreturn() system call as it needs to unwind the stack to + * restore preserved registers that may have been saved on the signal handler's + * call stack. + */ + +#define SIGTRAMP_SAVES \ + .unwabi 3, 's'; /* mark this as a sigtramp handler (saves scratch regs) */ \ + .unwabi @svr4, 's'; /* backwards compatibility with old unwinders (remove in v2.7) */ \ + .savesp ar.unat, UNAT_OFF+SIGCONTEXT_OFF; \ + .savesp ar.fpsr, FPSR_OFF+SIGCONTEXT_OFF; \ + .savesp pr, PR_OFF+SIGCONTEXT_OFF; \ + .savesp rp, RP_OFF+SIGCONTEXT_OFF; \ + .savesp ar.pfs, CFM_OFF+SIGCONTEXT_OFF; \ + .vframesp SP_OFF+SIGCONTEXT_OFF + +GLOBAL_ENTRY(__kernel_sigtramp) + // describe the state that is active when we get here: + .prologue + SIGTRAMP_SAVES + .body + + .label_state 1 + + adds base0=SIGHANDLER_OFF,sp + adds base1=RBS_BASE_OFF+SIGCONTEXT_OFF,sp + br.call.sptk.many rp=1f +1: + ld8 r17=[base0],(ARG0_OFF-SIGHANDLER_OFF) // get pointer to signal handler's plabel + ld8 r15=[base1] // get address of new RBS base (or NULL) + cover // push args in interrupted frame onto backing store + ;; + cmp.ne p1,p0=r15,r0 // do we need to switch rbs? (note: pr is saved by kernel) + mov.m r9=ar.bsp // fetch ar.bsp + .spillsp.p p1, ar.rnat, RNAT_OFF+SIGCONTEXT_OFF +(p1) br.cond.spnt setup_rbs // yup -> (clobbers p8, r14-r16, and r18-r20) +back_from_setup_rbs: + alloc r8=ar.pfs,0,0,3,0 + ld8 out0=[base0],16 // load arg0 (signum) + adds base1=(ARG1_OFF-(RBS_BASE_OFF+SIGCONTEXT_OFF)),base1 + ;; + ld8 out1=[base1] // load arg1 (siginfop) + ld8 r10=[r17],8 // get signal handler entry point + ;; + ld8 out2=[base0] // load arg2 (sigcontextp) + ld8 gp=[r17] // get signal handler's global pointer + adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + .spillsp ar.bsp, BSP_OFF+SIGCONTEXT_OFF + st8 [base0]=r9 // save sc_ar_bsp + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + stf.spill [base0]=f6,32 + stf.spill [base1]=f7,32 + ;; + stf.spill [base0]=f8,32 + stf.spill [base1]=f9,32 + mov b6=r10 + ;; + stf.spill [base0]=f10,32 + stf.spill [base1]=f11,32 + ;; + stf.spill [base0]=f12,32 + stf.spill [base1]=f13,32 + ;; + stf.spill [base0]=f14,32 + stf.spill [base1]=f15,32 + br.call.sptk.many rp=b6 // call the signal handler +.ret0: adds base0=(BSP_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r15=[base0] // fetch sc_ar_bsp + mov r14=ar.bsp + ;; + cmp.ne p1,p0=r14,r15 // do we need to restore the rbs? +(p1) br.cond.spnt restore_rbs // yup -> (clobbers r14-r18, f6 & f7) + ;; +back_from_restore_rbs: + adds base0=(FR6_OFF+SIGCONTEXT_OFF),sp + adds base1=(FR6_OFF+16+SIGCONTEXT_OFF),sp + ;; + ldf.fill f6=[base0],32 + ldf.fill f7=[base1],32 + ;; + ldf.fill f8=[base0],32 + ldf.fill f9=[base1],32 + ;; + ldf.fill f10=[base0],32 + ldf.fill f11=[base1],32 + ;; + ldf.fill f12=[base0],32 + ldf.fill f13=[base1],32 + ;; + ldf.fill f14=[base0],32 + ldf.fill f15=[base1],32 + mov r15=__NR_rt_sigreturn + .restore sp // pop .prologue + break __BREAK_SYSCALL + + .prologue + SIGTRAMP_SAVES +setup_rbs: + mov ar.rsc=0 // put RSE into enforced lazy mode + ;; + .save ar.rnat, r19 + mov r19=ar.rnat // save RNaT before switching backing store area + adds r14=(RNAT_OFF+SIGCONTEXT_OFF),sp + + mov r18=ar.bspstore + mov ar.bspstore=r15 // switch over to new register backing store area + ;; + + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF + st8 [r14]=r19 // save sc_ar_rnat + .body + mov.m r16=ar.bsp // sc_loadrs <- (new bsp - new bspstore) << 16 + adds r14=(LOADRS_OFF+SIGCONTEXT_OFF),sp + ;; + invala + sub r15=r16,r15 + extr.u r20=r18,3,6 + ;; + mov ar.rsc=0xf // set RSE into eager mode, pl 3 + cmp.eq p8,p0=63,r20 + shl r15=r15,16 + ;; + st8 [r14]=r15 // save sc_loadrs +(p8) st8 [r18]=r19 // if bspstore points at RNaT slot, store RNaT there now + .restore sp // pop .prologue + br.cond.sptk back_from_setup_rbs + + .prologue + SIGTRAMP_SAVES + .spillsp ar.rnat, RNAT_OFF+SIGCONTEXT_OFF + .body +restore_rbs: + // On input: + // r14 = bsp1 (bsp at the time of return from signal handler) + // r15 = bsp0 (bsp at the time the signal occurred) + // + // Here, we need to calculate bspstore0, the value that ar.bspstore needs + // to be set to, based on bsp0 and the size of the dirty partition on + // the alternate stack (sc_loadrs >> 16). This can be done with the + // following algorithm: + // + // bspstore0 = rse_skip_regs(bsp0, -rse_num_regs(bsp1 - (loadrs >> 19), bsp1)); + // + // This is what the code below does. + // + alloc r2=ar.pfs,0,0,0,0 // alloc null frame + adds r16=(LOADRS_OFF+SIGCONTEXT_OFF),sp + adds r18=(RNAT_OFF+SIGCONTEXT_OFF),sp + ;; + ld8 r17=[r16] + ld8 r16=[r18] // get new rnat + extr.u r18=r15,3,6 // r18 <- rse_slot_num(bsp0) + ;; + mov ar.rsc=r17 // put RSE into enforced lazy mode + shr.u r17=r17,16 + ;; + sub r14=r14,r17 // r14 (bspstore1) <- bsp1 - (sc_loadrs >> 16) + shr.u r17=r17,3 // r17 <- (sc_loadrs >> 19) + ;; + loadrs // restore dirty partition + extr.u r14=r14,3,6 // r14 <- rse_slot_num(bspstore1) + ;; + add r14=r14,r17 // r14 <- rse_slot_num(bspstore1) + (sc_loadrs >> 19) + ;; + shr.u r14=r14,6 // r14 <- (rse_slot_num(bspstore1) + (sc_loadrs >> 19))/0x40 + ;; + sub r14=r14,r17 // r14 <- -rse_num_regs(bspstore1, bsp1) + movl r17=0x8208208208208209 + ;; + add r18=r18,r14 // r18 (delta) <- rse_slot_num(bsp0) - rse_num_regs(bspstore1,bsp1) + setf.sig f7=r17 + cmp.lt p7,p0=r14,r0 // p7 <- (r14 < 0)? + ;; +(p7) adds r18=-62,r18 // delta -= 62 + ;; + setf.sig f6=r18 + ;; + xmpy.h f6=f6,f7 + ;; + getf.sig r17=f6 + ;; + add r17=r17,r18 + shr r18=r18,63 + ;; + shr r17=r17,5 + ;; + sub r17=r17,r18 // r17 = delta/63 + ;; + add r17=r14,r17 // r17 <- delta/63 - rse_num_regs(bspstore1, bsp1) + ;; + shladd r15=r17,3,r15 // r15 <- bsp0 + 8*(delta/63 - rse_num_regs(bspstore1, bsp1)) + ;; + mov ar.bspstore=r15 // switch back to old register backing store area + ;; + mov ar.rnat=r16 // restore RNaT + mov ar.rsc=0xf // (will be restored later on from sc_ar_rsc) + // invala not necessary as that will happen when returning to user-mode + br.cond.sptk back_from_restore_rbs +END(__kernel_sigtramp) + +/* + * On entry: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * b6 = return address + * On exit: + * r11 = saved ar.pfs + * r15 = system call # + * b0 = saved return address + * all other "scratch" registers: undefined + * all "preserved" registers: same as on entry + */ + +GLOBAL_ENTRY(__kernel_syscall_via_epc) + .prologue + .altrp b6 + .body +{ + /* + * Note: the kernel cannot assume that the first two instructions in this + * bundle get executed. The remaining code must be safe even if + * they do not get executed. + */ + adds r17=-1024,r15 // A + mov r10=0 // A default to successful syscall execution + epc // B causes split-issue +} + ;; + RSM_PSR_BE_I(r20, r22) // M2 (5 cyc to srlz.d) + LOAD_FSYSCALL_TABLE(r14) // X + ;; + mov r16=IA64_KR(CURRENT) // M2 (12 cyc) + shladd r18=r17,3,r14 // A + mov r19=NR_syscalls-1 // A + ;; + lfetch [r18] // M0|1 + MOV_FROM_PSR(p0, r29, r8) // M2 (12 cyc) + // If r17 is a NaT, p6 will be zero + cmp.geu p6,p7=r19,r17 // A (sysnr > 0 && sysnr < 1024+NR_syscalls)? + ;; + mov r21=ar.fpsr // M2 (12 cyc) + tnat.nz p10,p9=r15 // I0 + mov.i r26=ar.pfs // I0 (would stall anyhow due to srlz.d...) + ;; + srlz.d // M0 (forces split-issue) ensure PSR.BE==0 +(p6) ld8 r18=[r18] // M0|1 + nop.i 0 + ;; + nop.m 0 +(p6) tbit.z.unc p8,p0=r18,0 // I0 (dual-issues with "mov b7=r18"!) + nop.i 0 + ;; + SSM_PSR_I(p8, p14, r25) +(p6) mov b7=r18 // I0 +(p8) br.dptk.many b7 // B + + mov r27=ar.rsc // M2 (12 cyc) +/* + * brl.cond doesn't work as intended because the linker would convert this branch + * into a branch to a PLT. Perhaps there will be a way to avoid this with some + * future version of the linker. In the meantime, we just use an indirect branch + * instead. + */ +#ifdef CONFIG_ITANIUM +(p6) add r14=-8,r14 // r14 <- addr of fsys_bubble_down entry + ;; +(p6) ld8 r14=[r14] // r14 <- fsys_bubble_down + ;; +(p6) mov b7=r14 +(p6) br.sptk.many b7 +#else + BRL_COND_FSYS_BUBBLE_DOWN(p6) +#endif + SSM_PSR_I(p0, p14, r10) + mov r10=-1 +(p10) mov r8=EINVAL +(p9) mov r8=ENOSYS + FSYS_RETURN + +END(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/gate.lds.S b/arch/ia64/kernel/gate.lds.S new file mode 100644 index 000000000000..461c7e69d465 --- /dev/null +++ b/arch/ia64/kernel/gate.lds.S @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Linker script for gate DSO. The gate pages are an ELF shared object + * prelinked to its virtual address, with only one read-only segment and + * one execute-only segment (both fit in one page). This script controls + * its layout. + */ + +#include + +SECTIONS +{ + . = GATE_ADDR + SIZEOF_HEADERS; + + .hash : { *(.hash) } :readable + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note*) } :readable :note + + .dynamic : { *(.dynamic) } :readable :dynamic + + /* + * This linker script is used both with -r and with -shared. For + * the layouts to match, we need to skip more than enough space for + * the dynamic symbol table et al. If this amount is insufficient, + * ld -shared will barf. Just increase it here. + */ + . = GATE_ADDR + 0x600; + + .data..patch : { + __start_gate_mckinley_e9_patchlist = .; + *(.data..patch.mckinley_e9) + __end_gate_mckinley_e9_patchlist = .; + + __start_gate_vtop_patchlist = .; + *(.data..patch.vtop) + __end_gate_vtop_patchlist = .; + + __start_gate_fsyscall_patchlist = .; + *(.data..patch.fsyscall_table) + __end_gate_fsyscall_patchlist = .; + + __start_gate_brl_fsys_bubble_down_patchlist = .; + *(.data..patch.brl_fsys_bubble_down) + __end_gate_brl_fsys_bubble_down_patchlist = .; + } :readable + + .IA_64.unwind_info : { *(.IA_64.unwind_info*) } + .IA_64.unwind : { *(.IA_64.unwind*) } :readable :unwind +#ifdef HAVE_BUGGY_SEGREL + .text (GATE_ADDR + PAGE_SIZE) : { *(.text) *(.text.*) } :readable +#else + . = ALIGN(PERCPU_PAGE_SIZE) + (. & (PERCPU_PAGE_SIZE - 1)); + .text : { *(.text) *(.text.*) } :epc +#endif + + /DISCARD/ : { + *(.got.plt) *(.got) + *(.data .data.* .gnu.linkonce.d.*) + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.*) + *(__ex_table) + *(__mca_table) + } +} + +/* + * ld does not recognize this name token; use the constant. + */ +#define PT_IA_64_UNWIND 0x70000001 + +/* + * We must supply the ELF program headers explicitly to get just one + * PT_LOAD segment, and set the flags explicitly to make segments read-only. + */ +PHDRS +{ + readable PT_LOAD FILEHDR PHDRS FLAGS(4); /* PF_R */ +#ifndef HAVE_BUGGY_SEGREL + epc PT_LOAD FILEHDR PHDRS FLAGS(1); /* PF_X */ +#endif + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + note PT_NOTE FLAGS(4); /* PF_R */ + unwind PT_IA_64_UNWIND; +} + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + LINUX_2.5 { + global: + __kernel_syscall_via_break; + __kernel_syscall_via_epc; + __kernel_sigtramp; + + local: *; + }; +} + +/* The ELF entry point can be used to set the AT_SYSINFO value. */ +ENTRY(__kernel_syscall_via_epc) diff --git a/arch/ia64/kernel/head.S b/arch/ia64/kernel/head.S new file mode 100644 index 000000000000..85c8a57da402 --- /dev/null +++ b/arch/ia64/kernel/head.S @@ -0,0 +1,1167 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Here is where the ball gets rolling as far as the kernel is concerned. + * When control is transferred to _start, the bootload has already + * loaded us to the correct address. All that's left to do here is + * to set up the kernel's global pointer and jump to the kernel + * entry point. + * + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + * Copyright (C) 2002 Fenghua Yu + * -Optimize __ia64_save_fpu() and __ia64_load_fpu() for Itanium 2. + * Copyright (C) 2004 Ashok Raj + * Support for CPU Hotplug + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_HOTPLUG_CPU +#define SAL_PSR_BITS_TO_SET \ + (IA64_PSR_AC | IA64_PSR_BN | IA64_PSR_MFH | IA64_PSR_MFL) + +#define SAVE_FROM_REG(src, ptr, dest) \ + mov dest=src;; \ + st8 [ptr]=dest,0x08 + +#define RESTORE_REG(reg, ptr, _tmp) \ + ld8 _tmp=[ptr],0x08;; \ + mov reg=_tmp + +#define SAVE_BREAK_REGS(ptr, _idx, _breg, _dest)\ + mov ar.lc=IA64_NUM_DBG_REGS-1;; \ + mov _idx=0;; \ +1: \ + SAVE_FROM_REG(_breg[_idx], ptr, _dest);; \ + add _idx=1,_idx;; \ + br.cloop.sptk.many 1b + +#define RESTORE_BREAK_REGS(ptr, _idx, _breg, _tmp, _lbl)\ + mov ar.lc=IA64_NUM_DBG_REGS-1;; \ + mov _idx=0;; \ +_lbl: RESTORE_REG(_breg[_idx], ptr, _tmp);; \ + add _idx=1, _idx;; \ + br.cloop.sptk.many _lbl + +#define SAVE_ONE_RR(num, _reg, _tmp) \ + movl _tmp=(num<<61);; \ + mov _reg=rr[_tmp] + +#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ + SAVE_ONE_RR(0,_r0, _tmp);; \ + SAVE_ONE_RR(1,_r1, _tmp);; \ + SAVE_ONE_RR(2,_r2, _tmp);; \ + SAVE_ONE_RR(3,_r3, _tmp);; \ + SAVE_ONE_RR(4,_r4, _tmp);; \ + SAVE_ONE_RR(5,_r5, _tmp);; \ + SAVE_ONE_RR(6,_r6, _tmp);; \ + SAVE_ONE_RR(7,_r7, _tmp);; + +#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) \ + st8 [ptr]=_r0, 8;; \ + st8 [ptr]=_r1, 8;; \ + st8 [ptr]=_r2, 8;; \ + st8 [ptr]=_r3, 8;; \ + st8 [ptr]=_r4, 8;; \ + st8 [ptr]=_r5, 8;; \ + st8 [ptr]=_r6, 8;; \ + st8 [ptr]=_r7, 8;; + +#define RESTORE_REGION_REGS(ptr, _idx1, _idx2, _tmp) \ + mov ar.lc=0x08-1;; \ + movl _idx1=0x00;; \ +RestRR: \ + dep.z _idx2=_idx1,61,3;; \ + ld8 _tmp=[ptr],8;; \ + mov rr[_idx2]=_tmp;; \ + srlz.d;; \ + add _idx1=1,_idx1;; \ + br.cloop.sptk.few RestRR + +#define SET_AREA_FOR_BOOTING_CPU(reg1, reg2) \ + movl reg1=sal_state_for_booting_cpu;; \ + ld8 reg2=[reg1];; + +/* + * Adjust region registers saved before starting to save + * break regs and rest of the states that need to be preserved. + */ +#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(_reg1,_reg2,_pred) \ + SAVE_FROM_REG(b0,_reg1,_reg2);; \ + SAVE_FROM_REG(b1,_reg1,_reg2);; \ + SAVE_FROM_REG(b2,_reg1,_reg2);; \ + SAVE_FROM_REG(b3,_reg1,_reg2);; \ + SAVE_FROM_REG(b4,_reg1,_reg2);; \ + SAVE_FROM_REG(b5,_reg1,_reg2);; \ + st8 [_reg1]=r1,0x08;; \ + st8 [_reg1]=r12,0x08;; \ + st8 [_reg1]=r13,0x08;; \ + SAVE_FROM_REG(ar.fpsr,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.pfs,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.rnat,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.unat,_reg1,_reg2);; \ + SAVE_FROM_REG(ar.bspstore,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.dcr,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.iva,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.pta,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.itv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.pmv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.cmcv,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.lrr0,_reg1,_reg2);; \ + SAVE_FROM_REG(cr.lrr1,_reg1,_reg2);; \ + st8 [_reg1]=r4,0x08;; \ + st8 [_reg1]=r5,0x08;; \ + st8 [_reg1]=r6,0x08;; \ + st8 [_reg1]=r7,0x08;; \ + st8 [_reg1]=_pred,0x08;; \ + SAVE_FROM_REG(ar.lc, _reg1, _reg2);; \ + stf.spill.nta [_reg1]=f2,16;; \ + stf.spill.nta [_reg1]=f3,16;; \ + stf.spill.nta [_reg1]=f4,16;; \ + stf.spill.nta [_reg1]=f5,16;; \ + stf.spill.nta [_reg1]=f16,16;; \ + stf.spill.nta [_reg1]=f17,16;; \ + stf.spill.nta [_reg1]=f18,16;; \ + stf.spill.nta [_reg1]=f19,16;; \ + stf.spill.nta [_reg1]=f20,16;; \ + stf.spill.nta [_reg1]=f21,16;; \ + stf.spill.nta [_reg1]=f22,16;; \ + stf.spill.nta [_reg1]=f23,16;; \ + stf.spill.nta [_reg1]=f24,16;; \ + stf.spill.nta [_reg1]=f25,16;; \ + stf.spill.nta [_reg1]=f26,16;; \ + stf.spill.nta [_reg1]=f27,16;; \ + stf.spill.nta [_reg1]=f28,16;; \ + stf.spill.nta [_reg1]=f29,16;; \ + stf.spill.nta [_reg1]=f30,16;; \ + stf.spill.nta [_reg1]=f31,16;; + +#else +#define SET_AREA_FOR_BOOTING_CPU(a1, a2) +#define SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(a1,a2, a3) +#define SAVE_REGION_REGS(_tmp, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) +#define STORE_REGION_REGS(ptr, _r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7) +#endif + +#define SET_ONE_RR(num, pgsize, _tmp1, _tmp2, vhpt) \ + movl _tmp1=(num << 61);; \ + mov _tmp2=((ia64_rid(IA64_REGION_ID_KERNEL, (num<<61)) << 8) | (pgsize << 2) | vhpt);; \ + mov rr[_tmp1]=_tmp2 + + __PAGE_ALIGNED_DATA + + .global empty_zero_page +EXPORT_SYMBOL_GPL(empty_zero_page) +empty_zero_page: + .skip PAGE_SIZE + + .global swapper_pg_dir +swapper_pg_dir: + .skip PAGE_SIZE + + .rodata +halt_msg: + stringz "Halting kernel\n" + + __REF + + .global start_ap + + /* + * Start the kernel. When the bootloader passes control to _start(), r28 + * points to the address of the boot parameter area. Execution reaches + * here in physical mode. + */ +GLOBAL_ENTRY(_start) +start_ap: + .prologue + .save rp, r0 // terminate unwind chain with a NULL rp + .body + + rsm psr.i | psr.ic + ;; + srlz.i + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + /* + * Save the region registers, predicate before they get clobbered + */ + SAVE_REGION_REGS(r2, r8,r9,r10,r11,r12,r13,r14,r15); + mov r25=pr;; + + /* + * Initialize kernel region registers: + * rr[0]: VHPT enabled, page size = PAGE_SHIFT + * rr[1]: VHPT enabled, page size = PAGE_SHIFT + * rr[2]: VHPT enabled, page size = PAGE_SHIFT + * rr[3]: VHPT enabled, page size = PAGE_SHIFT + * rr[4]: VHPT enabled, page size = PAGE_SHIFT + * rr[5]: VHPT enabled, page size = PAGE_SHIFT + * rr[6]: VHPT disabled, page size = IA64_GRANULE_SHIFT + * rr[7]: VHPT disabled, page size = IA64_GRANULE_SHIFT + * We initialize all of them to prevent inadvertently assuming + * something about the state of address translation early in boot. + */ + SET_ONE_RR(0, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(1, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(2, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(3, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(4, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(5, PAGE_SHIFT, r2, r16, 1);; + SET_ONE_RR(6, IA64_GRANULE_SHIFT, r2, r16, 0);; + SET_ONE_RR(7, IA64_GRANULE_SHIFT, r2, r16, 0);; + /* + * Now pin mappings into the TLB for kernel text and data + */ + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + mov r3=ip + movl r18=PAGE_KERNEL + ;; + dep r2=0,r3,0,KERNEL_TR_PAGE_SHIFT + ;; + or r18=r2,r18 + ;; + srlz.i + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + srlz.i + + /* + * Switch into virtual mode: + */ + movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \ + |IA64_PSR_DI) + ;; + mov cr.ipsr=r16 + movl r17=1f + ;; + mov cr.iip=r17 + mov cr.ifs=r0 + ;; + rfi + ;; +1: // now we are in virtual mode + + SET_AREA_FOR_BOOTING_CPU(r2, r16); + + STORE_REGION_REGS(r16, r8,r9,r10,r11,r12,r13,r14,r15); + SAL_TO_OS_BOOT_HANDOFF_STATE_SAVE(r16,r17,r25) + ;; + + // set IVT entry point---can't access I/O ports without it + movl r3=ia64_ivt + ;; + mov cr.iva=r3 + movl r2=FPSR_DEFAULT + ;; + srlz.i + movl gp=__gp + + mov ar.fpsr=r2 + ;; + +#define isAP p2 // are we an Application Processor? +#define isBP p3 // are we the Bootstrap Processor? + +#ifdef CONFIG_SMP + /* + * Find the init_task for the currently booting CPU. At poweron, and in + * UP mode, task_for_booting_cpu is NULL. + */ + movl r3=task_for_booting_cpu + ;; + ld8 r3=[r3] + movl r2=init_task + ;; + cmp.eq isBP,isAP=r3,r0 + ;; +(isAP) mov r2=r3 +#else + movl r2=init_task + cmp.eq isBP,isAP=r0,r0 +#endif + ;; + tpa r3=r2 // r3 == phys addr of task struct + mov r16=-1 +(isBP) br.cond.dpnt .load_current // BP stack is on region 5 --- no need to map it + + // load mapping for stack (virtaddr in r2, physaddr in r3) + rsm psr.ic + movl r17=PAGE_KERNEL + ;; + srlz.d + dep r18=0,r3,0,12 + ;; + or r18=r17,r18 + dep r2=-1,r3,61,3 // IMVA of task + ;; + mov r17=rr[r2] + shr.u r16=r3,IA64_GRANULE_SHIFT + ;; + dep r17=0,r17,8,24 + ;; + mov cr.itir=r17 + mov cr.ifa=r2 + + mov r19=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r19]=r18 + ;; + ssm psr.ic + srlz.d + ;; + +.load_current: + // load the "current" pointer (r13) and ar.k6 with the current task + mov IA64_KR(CURRENT)=r2 // virtual address + mov IA64_KR(CURRENT_STACK)=r16 + mov r13=r2 + /* + * Reserve space at the top of the stack for "struct pt_regs". Kernel + * threads don't store interesting values in that structure, but the space + * still needs to be there because time-critical stuff such as the context + * switching can be implemented more efficiently (for example, __switch_to() + * always sets the psr.dfh bit of the task it is switching to). + */ + + addl r12=IA64_STK_OFFSET-IA64_PT_REGS_SIZE-16,r2 + addl r2=IA64_RBS_OFFSET,r2 // initialize the RSE + mov ar.rsc=0 // place RSE in enforced lazy mode + ;; + loadrs // clear the dirty partition + movl r19=__phys_per_cpu_start + mov r18=PERCPU_PAGE_SIZE + ;; +#ifndef CONFIG_SMP + add r19=r19,r18 + ;; +#else +(isAP) br.few 2f + movl r20=__cpu0_per_cpu + ;; + shr.u r18=r18,3 +1: + ld8 r21=[r19],8;; + st8[r20]=r21,8 + adds r18=-1,r18;; + cmp4.lt p7,p6=0,r18 +(p7) br.cond.dptk.few 1b + mov r19=r20 + ;; +2: +#endif + tpa r19=r19 + ;; + .pred.rel.mutex isBP,isAP +(isBP) mov IA64_KR(PER_CPU_DATA)=r19 // per-CPU base for cpu0 +(isAP) mov IA64_KR(PER_CPU_DATA)=r0 // clear physical per-CPU base + ;; + mov ar.bspstore=r2 // establish the new RSE stack + ;; + mov ar.rsc=0x3 // place RSE in eager mode + +(isBP) dep r28=-1,r28,61,3 // make address virtual +(isBP) movl r2=ia64_boot_param + ;; +(isBP) st8 [r2]=r28 // save the address of the boot param area passed by the bootloader + +#ifdef CONFIG_SMP +(isAP) br.call.sptk.many rp=start_secondary +.ret0: +(isAP) br.cond.sptk self +#endif + + // This is executed by the bootstrap processor (bsp) only: + + br.call.sptk.many rp=start_kernel +.ret2: addl r3=@ltoff(halt_msg),gp + ;; + alloc r2=ar.pfs,8,0,2,0 + ;; + ld8 out0=[r3] + br.call.sptk.many b0=console_print + +self: hint @pause + br.sptk.many self // endless loop +END(_start) + + .text + +GLOBAL_ENTRY(ia64_save_debug_regs) + alloc r16=ar.pfs,1,0,0,0 + mov r20=ar.lc // preserve ar.lc + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=0 + add r19=IA64_NUM_DBG_REGS*8,in0 + ;; +1: mov r16=dbr[r18] +#ifdef CONFIG_ITANIUM + ;; + srlz.d +#endif + mov r17=ibr[r18] + add r18=1,r18 + ;; + st8.nta [in0]=r16,8 + st8.nta [r19]=r17,8 + br.cloop.sptk.many 1b + ;; + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.many rp +END(ia64_save_debug_regs) + +GLOBAL_ENTRY(ia64_load_debug_regs) + alloc r16=ar.pfs,1,0,0,0 + lfetch.nta [in0] + mov r20=ar.lc // preserve ar.lc + add r19=IA64_NUM_DBG_REGS*8,in0 + mov ar.lc=IA64_NUM_DBG_REGS-1 + mov r18=-1 + ;; +1: ld8.nta r16=[in0],8 + ld8.nta r17=[r19],8 + add r18=1,r18 + ;; + mov dbr[r18]=r16 +#ifdef CONFIG_ITANIUM + ;; + srlz.d // Errata 132 (NoFix status) +#endif + mov ibr[r18]=r17 + br.cloop.sptk.many 1b + ;; + mov ar.lc=r20 // restore ar.lc + br.ret.sptk.many rp +END(ia64_load_debug_regs) + +GLOBAL_ENTRY(__ia64_save_fpu) + alloc r2=ar.pfs,1,4,0,0 + adds loc0=96*16-16,in0 + adds loc1=96*16-16-128,in0 + ;; + stf.spill.nta [loc0]=f127,-256 + stf.spill.nta [loc1]=f119,-256 + ;; + stf.spill.nta [loc0]=f111,-256 + stf.spill.nta [loc1]=f103,-256 + ;; + stf.spill.nta [loc0]=f95,-256 + stf.spill.nta [loc1]=f87,-256 + ;; + stf.spill.nta [loc0]=f79,-256 + stf.spill.nta [loc1]=f71,-256 + ;; + stf.spill.nta [loc0]=f63,-256 + stf.spill.nta [loc1]=f55,-256 + adds loc2=96*16-32,in0 + ;; + stf.spill.nta [loc0]=f47,-256 + stf.spill.nta [loc1]=f39,-256 + adds loc3=96*16-32-128,in0 + ;; + stf.spill.nta [loc2]=f126,-256 + stf.spill.nta [loc3]=f118,-256 + ;; + stf.spill.nta [loc2]=f110,-256 + stf.spill.nta [loc3]=f102,-256 + ;; + stf.spill.nta [loc2]=f94,-256 + stf.spill.nta [loc3]=f86,-256 + ;; + stf.spill.nta [loc2]=f78,-256 + stf.spill.nta [loc3]=f70,-256 + ;; + stf.spill.nta [loc2]=f62,-256 + stf.spill.nta [loc3]=f54,-256 + adds loc0=96*16-48,in0 + ;; + stf.spill.nta [loc2]=f46,-256 + stf.spill.nta [loc3]=f38,-256 + adds loc1=96*16-48-128,in0 + ;; + stf.spill.nta [loc0]=f125,-256 + stf.spill.nta [loc1]=f117,-256 + ;; + stf.spill.nta [loc0]=f109,-256 + stf.spill.nta [loc1]=f101,-256 + ;; + stf.spill.nta [loc0]=f93,-256 + stf.spill.nta [loc1]=f85,-256 + ;; + stf.spill.nta [loc0]=f77,-256 + stf.spill.nta [loc1]=f69,-256 + ;; + stf.spill.nta [loc0]=f61,-256 + stf.spill.nta [loc1]=f53,-256 + adds loc2=96*16-64,in0 + ;; + stf.spill.nta [loc0]=f45,-256 + stf.spill.nta [loc1]=f37,-256 + adds loc3=96*16-64-128,in0 + ;; + stf.spill.nta [loc2]=f124,-256 + stf.spill.nta [loc3]=f116,-256 + ;; + stf.spill.nta [loc2]=f108,-256 + stf.spill.nta [loc3]=f100,-256 + ;; + stf.spill.nta [loc2]=f92,-256 + stf.spill.nta [loc3]=f84,-256 + ;; + stf.spill.nta [loc2]=f76,-256 + stf.spill.nta [loc3]=f68,-256 + ;; + stf.spill.nta [loc2]=f60,-256 + stf.spill.nta [loc3]=f52,-256 + adds loc0=96*16-80,in0 + ;; + stf.spill.nta [loc2]=f44,-256 + stf.spill.nta [loc3]=f36,-256 + adds loc1=96*16-80-128,in0 + ;; + stf.spill.nta [loc0]=f123,-256 + stf.spill.nta [loc1]=f115,-256 + ;; + stf.spill.nta [loc0]=f107,-256 + stf.spill.nta [loc1]=f99,-256 + ;; + stf.spill.nta [loc0]=f91,-256 + stf.spill.nta [loc1]=f83,-256 + ;; + stf.spill.nta [loc0]=f75,-256 + stf.spill.nta [loc1]=f67,-256 + ;; + stf.spill.nta [loc0]=f59,-256 + stf.spill.nta [loc1]=f51,-256 + adds loc2=96*16-96,in0 + ;; + stf.spill.nta [loc0]=f43,-256 + stf.spill.nta [loc1]=f35,-256 + adds loc3=96*16-96-128,in0 + ;; + stf.spill.nta [loc2]=f122,-256 + stf.spill.nta [loc3]=f114,-256 + ;; + stf.spill.nta [loc2]=f106,-256 + stf.spill.nta [loc3]=f98,-256 + ;; + stf.spill.nta [loc2]=f90,-256 + stf.spill.nta [loc3]=f82,-256 + ;; + stf.spill.nta [loc2]=f74,-256 + stf.spill.nta [loc3]=f66,-256 + ;; + stf.spill.nta [loc2]=f58,-256 + stf.spill.nta [loc3]=f50,-256 + adds loc0=96*16-112,in0 + ;; + stf.spill.nta [loc2]=f42,-256 + stf.spill.nta [loc3]=f34,-256 + adds loc1=96*16-112-128,in0 + ;; + stf.spill.nta [loc0]=f121,-256 + stf.spill.nta [loc1]=f113,-256 + ;; + stf.spill.nta [loc0]=f105,-256 + stf.spill.nta [loc1]=f97,-256 + ;; + stf.spill.nta [loc0]=f89,-256 + stf.spill.nta [loc1]=f81,-256 + ;; + stf.spill.nta [loc0]=f73,-256 + stf.spill.nta [loc1]=f65,-256 + ;; + stf.spill.nta [loc0]=f57,-256 + stf.spill.nta [loc1]=f49,-256 + adds loc2=96*16-128,in0 + ;; + stf.spill.nta [loc0]=f41,-256 + stf.spill.nta [loc1]=f33,-256 + adds loc3=96*16-128-128,in0 + ;; + stf.spill.nta [loc2]=f120,-256 + stf.spill.nta [loc3]=f112,-256 + ;; + stf.spill.nta [loc2]=f104,-256 + stf.spill.nta [loc3]=f96,-256 + ;; + stf.spill.nta [loc2]=f88,-256 + stf.spill.nta [loc3]=f80,-256 + ;; + stf.spill.nta [loc2]=f72,-256 + stf.spill.nta [loc3]=f64,-256 + ;; + stf.spill.nta [loc2]=f56,-256 + stf.spill.nta [loc3]=f48,-256 + ;; + stf.spill.nta [loc2]=f40 + stf.spill.nta [loc3]=f32 + br.ret.sptk.many rp +END(__ia64_save_fpu) + +GLOBAL_ENTRY(__ia64_load_fpu) + alloc r2=ar.pfs,1,2,0,0 + adds r3=128,in0 + adds r14=256,in0 + adds r15=384,in0 + mov loc0=512 + mov loc1=-1024+16 + ;; + ldf.fill.nta f32=[in0],loc0 + ldf.fill.nta f40=[ r3],loc0 + ldf.fill.nta f48=[r14],loc0 + ldf.fill.nta f56=[r15],loc0 + ;; + ldf.fill.nta f64=[in0],loc0 + ldf.fill.nta f72=[ r3],loc0 + ldf.fill.nta f80=[r14],loc0 + ldf.fill.nta f88=[r15],loc0 + ;; + ldf.fill.nta f96=[in0],loc1 + ldf.fill.nta f104=[ r3],loc1 + ldf.fill.nta f112=[r14],loc1 + ldf.fill.nta f120=[r15],loc1 + ;; + ldf.fill.nta f33=[in0],loc0 + ldf.fill.nta f41=[ r3],loc0 + ldf.fill.nta f49=[r14],loc0 + ldf.fill.nta f57=[r15],loc0 + ;; + ldf.fill.nta f65=[in0],loc0 + ldf.fill.nta f73=[ r3],loc0 + ldf.fill.nta f81=[r14],loc0 + ldf.fill.nta f89=[r15],loc0 + ;; + ldf.fill.nta f97=[in0],loc1 + ldf.fill.nta f105=[ r3],loc1 + ldf.fill.nta f113=[r14],loc1 + ldf.fill.nta f121=[r15],loc1 + ;; + ldf.fill.nta f34=[in0],loc0 + ldf.fill.nta f42=[ r3],loc0 + ldf.fill.nta f50=[r14],loc0 + ldf.fill.nta f58=[r15],loc0 + ;; + ldf.fill.nta f66=[in0],loc0 + ldf.fill.nta f74=[ r3],loc0 + ldf.fill.nta f82=[r14],loc0 + ldf.fill.nta f90=[r15],loc0 + ;; + ldf.fill.nta f98=[in0],loc1 + ldf.fill.nta f106=[ r3],loc1 + ldf.fill.nta f114=[r14],loc1 + ldf.fill.nta f122=[r15],loc1 + ;; + ldf.fill.nta f35=[in0],loc0 + ldf.fill.nta f43=[ r3],loc0 + ldf.fill.nta f51=[r14],loc0 + ldf.fill.nta f59=[r15],loc0 + ;; + ldf.fill.nta f67=[in0],loc0 + ldf.fill.nta f75=[ r3],loc0 + ldf.fill.nta f83=[r14],loc0 + ldf.fill.nta f91=[r15],loc0 + ;; + ldf.fill.nta f99=[in0],loc1 + ldf.fill.nta f107=[ r3],loc1 + ldf.fill.nta f115=[r14],loc1 + ldf.fill.nta f123=[r15],loc1 + ;; + ldf.fill.nta f36=[in0],loc0 + ldf.fill.nta f44=[ r3],loc0 + ldf.fill.nta f52=[r14],loc0 + ldf.fill.nta f60=[r15],loc0 + ;; + ldf.fill.nta f68=[in0],loc0 + ldf.fill.nta f76=[ r3],loc0 + ldf.fill.nta f84=[r14],loc0 + ldf.fill.nta f92=[r15],loc0 + ;; + ldf.fill.nta f100=[in0],loc1 + ldf.fill.nta f108=[ r3],loc1 + ldf.fill.nta f116=[r14],loc1 + ldf.fill.nta f124=[r15],loc1 + ;; + ldf.fill.nta f37=[in0],loc0 + ldf.fill.nta f45=[ r3],loc0 + ldf.fill.nta f53=[r14],loc0 + ldf.fill.nta f61=[r15],loc0 + ;; + ldf.fill.nta f69=[in0],loc0 + ldf.fill.nta f77=[ r3],loc0 + ldf.fill.nta f85=[r14],loc0 + ldf.fill.nta f93=[r15],loc0 + ;; + ldf.fill.nta f101=[in0],loc1 + ldf.fill.nta f109=[ r3],loc1 + ldf.fill.nta f117=[r14],loc1 + ldf.fill.nta f125=[r15],loc1 + ;; + ldf.fill.nta f38 =[in0],loc0 + ldf.fill.nta f46 =[ r3],loc0 + ldf.fill.nta f54 =[r14],loc0 + ldf.fill.nta f62 =[r15],loc0 + ;; + ldf.fill.nta f70 =[in0],loc0 + ldf.fill.nta f78 =[ r3],loc0 + ldf.fill.nta f86 =[r14],loc0 + ldf.fill.nta f94 =[r15],loc0 + ;; + ldf.fill.nta f102=[in0],loc1 + ldf.fill.nta f110=[ r3],loc1 + ldf.fill.nta f118=[r14],loc1 + ldf.fill.nta f126=[r15],loc1 + ;; + ldf.fill.nta f39 =[in0],loc0 + ldf.fill.nta f47 =[ r3],loc0 + ldf.fill.nta f55 =[r14],loc0 + ldf.fill.nta f63 =[r15],loc0 + ;; + ldf.fill.nta f71 =[in0],loc0 + ldf.fill.nta f79 =[ r3],loc0 + ldf.fill.nta f87 =[r14],loc0 + ldf.fill.nta f95 =[r15],loc0 + ;; + ldf.fill.nta f103=[in0] + ldf.fill.nta f111=[ r3] + ldf.fill.nta f119=[r14] + ldf.fill.nta f127=[r15] + br.ret.sptk.many rp +END(__ia64_load_fpu) + +GLOBAL_ENTRY(__ia64_init_fpu) + stf.spill [sp]=f0 // M3 + mov f32=f0 // F + nop.b 0 + + ldfps f33,f34=[sp] // M0 + ldfps f35,f36=[sp] // M1 + mov f37=f0 // F + ;; + + setf.s f38=r0 // M2 + setf.s f39=r0 // M3 + mov f40=f0 // F + + ldfps f41,f42=[sp] // M0 + ldfps f43,f44=[sp] // M1 + mov f45=f0 // F + + setf.s f46=r0 // M2 + setf.s f47=r0 // M3 + mov f48=f0 // F + + ldfps f49,f50=[sp] // M0 + ldfps f51,f52=[sp] // M1 + mov f53=f0 // F + + setf.s f54=r0 // M2 + setf.s f55=r0 // M3 + mov f56=f0 // F + + ldfps f57,f58=[sp] // M0 + ldfps f59,f60=[sp] // M1 + mov f61=f0 // F + + setf.s f62=r0 // M2 + setf.s f63=r0 // M3 + mov f64=f0 // F + + ldfps f65,f66=[sp] // M0 + ldfps f67,f68=[sp] // M1 + mov f69=f0 // F + + setf.s f70=r0 // M2 + setf.s f71=r0 // M3 + mov f72=f0 // F + + ldfps f73,f74=[sp] // M0 + ldfps f75,f76=[sp] // M1 + mov f77=f0 // F + + setf.s f78=r0 // M2 + setf.s f79=r0 // M3 + mov f80=f0 // F + + ldfps f81,f82=[sp] // M0 + ldfps f83,f84=[sp] // M1 + mov f85=f0 // F + + setf.s f86=r0 // M2 + setf.s f87=r0 // M3 + mov f88=f0 // F + + /* + * When the instructions are cached, it would be faster to initialize + * the remaining registers with simply mov instructions (F-unit). + * This gets the time down to ~29 cycles. However, this would use up + * 33 bundles, whereas continuing with the above pattern yields + * 10 bundles and ~30 cycles. + */ + + ldfps f89,f90=[sp] // M0 + ldfps f91,f92=[sp] // M1 + mov f93=f0 // F + + setf.s f94=r0 // M2 + setf.s f95=r0 // M3 + mov f96=f0 // F + + ldfps f97,f98=[sp] // M0 + ldfps f99,f100=[sp] // M1 + mov f101=f0 // F + + setf.s f102=r0 // M2 + setf.s f103=r0 // M3 + mov f104=f0 // F + + ldfps f105,f106=[sp] // M0 + ldfps f107,f108=[sp] // M1 + mov f109=f0 // F + + setf.s f110=r0 // M2 + setf.s f111=r0 // M3 + mov f112=f0 // F + + ldfps f113,f114=[sp] // M0 + ldfps f115,f116=[sp] // M1 + mov f117=f0 // F + + setf.s f118=r0 // M2 + setf.s f119=r0 // M3 + mov f120=f0 // F + + ldfps f121,f122=[sp] // M0 + ldfps f123,f124=[sp] // M1 + mov f125=f0 // F + + setf.s f126=r0 // M2 + setf.s f127=r0 // M3 + br.ret.sptk.many rp // F +END(__ia64_init_fpu) + +/* + * Switch execution mode from virtual to physical + * + * Inputs: + * r16 = new psr to establish + * Output: + * r19 = old virtual address of ar.bsp + * r20 = old virtual address of sp + * + * Note: RSE must already be in enforced lazy mode + */ +GLOBAL_ENTRY(ia64_switch_mode_phys) + { + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-ia64_switch_mode_phys,r15 + + mov r19=ar.bsp + mov r20=sp + mov r14=rp // get return address into a general register + ;; + + // going to physical mode, use tpa to translate virt->phys + tpa r17=r19 + tpa r3=r3 + tpa sp=sp + tpa r14=r14 + ;; + + mov r18=ar.rnat // save ar.rnat + mov ar.bspstore=r17 // this steps on ar.rnat + mov cr.iip=r3 + mov cr.ifs=r0 + ;; + mov ar.rnat=r18 // restore ar.rnat + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.many rp +END(ia64_switch_mode_phys) + +/* + * Switch execution mode from physical to virtual + * + * Inputs: + * r16 = new psr to establish + * r19 = new bspstore to establish + * r20 = new sp to establish + * + * Note: RSE must already be in enforced lazy mode + */ +GLOBAL_ENTRY(ia64_switch_mode_virt) + { + rsm psr.i | psr.ic // disable interrupts and interrupt collection + mov r15=ip + } + ;; + { + flushrs // must be first insn in group + srlz.i + } + ;; + mov cr.ipsr=r16 // set new PSR + add r3=1f-ia64_switch_mode_virt,r15 + + mov r14=rp // get return address into a general register + ;; + + // going to virtual + // - for code addresses, set upper bits of addr to KERNEL_START + // - for stack addresses, copy from input argument + movl r18=KERNEL_START + dep r3=0,r3,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT + dep r14=0,r14,KERNEL_TR_PAGE_SHIFT,64-KERNEL_TR_PAGE_SHIFT + mov sp=r20 + ;; + or r3=r3,r18 + or r14=r14,r18 + ;; + + mov r18=ar.rnat // save ar.rnat + mov ar.bspstore=r19 // this steps on ar.rnat + mov cr.iip=r3 + mov cr.ifs=r0 + ;; + mov ar.rnat=r18 // restore ar.rnat + rfi // must be last insn in group + ;; +1: mov rp=r14 + br.ret.sptk.many rp +END(ia64_switch_mode_virt) + +GLOBAL_ENTRY(ia64_delay_loop) + .prologue +{ nop 0 // work around GAS unwind info generation bug... + .save ar.lc,r2 + mov r2=ar.lc + .body + ;; + mov ar.lc=r32 +} + ;; + // force loop to be 32-byte aligned (GAS bug means we cannot use .align + // inside function body without corrupting unwind info). +{ nop 0 } +1: br.cloop.sptk.few 1b + ;; + mov ar.lc=r2 + br.ret.sptk.many rp +END(ia64_delay_loop) + +/* + * Return a CPU-local timestamp in nano-seconds. This timestamp is + * NOT synchronized across CPUs its return value must never be + * compared against the values returned on another CPU. The usage in + * kernel/sched/core.c ensures that. + * + * The return-value of sched_clock() is NOT supposed to wrap-around. + * If it did, it would cause some scheduling hiccups (at the worst). + * Fortunately, with a 64-bit cycle-counter ticking at 100GHz, even + * that would happen only once every 5+ years. + * + * The code below basically calculates: + * + * (ia64_get_itc() * local_cpu_data->nsec_per_cyc) >> IA64_NSEC_PER_CYC_SHIFT + * + * except that the multiplication and the shift are done with 128-bit + * intermediate precision so that we can produce a full 64-bit result. + */ +GLOBAL_ENTRY(ia64_native_sched_clock) + addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + mov.m r9=ar.itc // fetch cycle-counter (35 cyc) + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r9 // certain to stall, so issue it _after_ ldf8... + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(ia64_native_sched_clock) + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +GLOBAL_ENTRY(cycle_to_nsec) + alloc r16=ar.pfs,1,0,0,0 + addl r8=THIS_CPU(ia64_cpu_info) + IA64_CPUINFO_NSEC_PER_CYC_OFFSET,r0 + ;; + ldf8 f8=[r8] + ;; + setf.sig f9=r32 + ;; + xmpy.lu f10=f9,f8 // calculate low 64 bits of 128-bit product (4 cyc) + xmpy.hu f11=f9,f8 // calculate high 64 bits of 128-bit product + ;; + getf.sig r8=f10 // (5 cyc) + getf.sig r9=f11 + ;; + shrp r8=r9,r8,IA64_NSEC_PER_CYC_SHIFT + br.ret.sptk.many rp +END(cycle_to_nsec) +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +#ifdef CONFIG_IA64_BRL_EMU + +/* + * Assembly routines used by brl_emu.c to set preserved register state. + */ + +#define SET_REG(reg) \ + GLOBAL_ENTRY(ia64_set_##reg); \ + alloc r16=ar.pfs,1,0,0,0; \ + mov reg=r32; \ + ;; \ + br.ret.sptk.many rp; \ + END(ia64_set_##reg) + +SET_REG(b1); +SET_REG(b2); +SET_REG(b3); +SET_REG(b4); +SET_REG(b5); + +#endif /* CONFIG_IA64_BRL_EMU */ + +#ifdef CONFIG_SMP + +#ifdef CONFIG_HOTPLUG_CPU +GLOBAL_ENTRY(ia64_jump_to_sal) + alloc r16=ar.pfs,1,0,0,0;; + rsm psr.i | psr.ic +{ + flushrs + srlz.i +} + tpa r25=in0 + movl r18=tlb_purge_done;; + DATA_VA_TO_PA(r18);; + mov b1=r18 // Return location + movl r18=ia64_do_tlb_purge;; + DATA_VA_TO_PA(r18);; + mov b2=r18 // doing tlb_flush work + mov ar.rsc=0 // Put RSE in enforced lazy, LE mode + movl r17=1f;; + DATA_VA_TO_PA(r17);; + mov cr.iip=r17 + movl r16=SAL_PSR_BITS_TO_SET;; + mov cr.ipsr=r16 + mov cr.ifs=r0;; + rfi;; // note: this unmask MCA/INIT (psr.mc) +1: + /* + * Invalidate all TLB data/inst + */ + br.sptk.many b2;; // jump to tlb purge code + +tlb_purge_done: + RESTORE_REGION_REGS(r25, r17,r18,r19);; + RESTORE_REG(b0, r25, r17);; + RESTORE_REG(b1, r25, r17);; + RESTORE_REG(b2, r25, r17);; + RESTORE_REG(b3, r25, r17);; + RESTORE_REG(b4, r25, r17);; + RESTORE_REG(b5, r25, r17);; + ld8 r1=[r25],0x08;; + ld8 r12=[r25],0x08;; + ld8 r13=[r25],0x08;; + RESTORE_REG(ar.fpsr, r25, r17);; + RESTORE_REG(ar.pfs, r25, r17);; + RESTORE_REG(ar.rnat, r25, r17);; + RESTORE_REG(ar.unat, r25, r17);; + RESTORE_REG(ar.bspstore, r25, r17);; + RESTORE_REG(cr.dcr, r25, r17);; + RESTORE_REG(cr.iva, r25, r17);; + RESTORE_REG(cr.pta, r25, r17);; + srlz.d;; // required not to violate RAW dependency + RESTORE_REG(cr.itv, r25, r17);; + RESTORE_REG(cr.pmv, r25, r17);; + RESTORE_REG(cr.cmcv, r25, r17);; + RESTORE_REG(cr.lrr0, r25, r17);; + RESTORE_REG(cr.lrr1, r25, r17);; + ld8 r4=[r25],0x08;; + ld8 r5=[r25],0x08;; + ld8 r6=[r25],0x08;; + ld8 r7=[r25],0x08;; + ld8 r17=[r25],0x08;; + mov pr=r17,-1;; + RESTORE_REG(ar.lc, r25, r17);; + /* + * Now Restore floating point regs + */ + ldf.fill.nta f2=[r25],16;; + ldf.fill.nta f3=[r25],16;; + ldf.fill.nta f4=[r25],16;; + ldf.fill.nta f5=[r25],16;; + ldf.fill.nta f16=[r25],16;; + ldf.fill.nta f17=[r25],16;; + ldf.fill.nta f18=[r25],16;; + ldf.fill.nta f19=[r25],16;; + ldf.fill.nta f20=[r25],16;; + ldf.fill.nta f21=[r25],16;; + ldf.fill.nta f22=[r25],16;; + ldf.fill.nta f23=[r25],16;; + ldf.fill.nta f24=[r25],16;; + ldf.fill.nta f25=[r25],16;; + ldf.fill.nta f26=[r25],16;; + ldf.fill.nta f27=[r25],16;; + ldf.fill.nta f28=[r25],16;; + ldf.fill.nta f29=[r25],16;; + ldf.fill.nta f30=[r25],16;; + ldf.fill.nta f31=[r25],16;; + + /* + * Now that we have done all the register restores + * we are now ready for the big DIVE to SAL Land + */ + ssm psr.ic;; + srlz.d;; + br.ret.sptk.many b0;; +END(ia64_jump_to_sal) +#endif /* CONFIG_HOTPLUG_CPU */ + +#endif /* CONFIG_SMP */ diff --git a/arch/ia64/kernel/iosapic.c b/arch/ia64/kernel/iosapic.c new file mode 100644 index 000000000000..99300850abc1 --- /dev/null +++ b/arch/ia64/kernel/iosapic.c @@ -0,0 +1,1137 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * I/O SAPIC support. + * + * Copyright (C) 1999 Intel Corp. + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 2000-2002 J.I. Lee + * Copyright (C) 1999-2000, 2002-2003 Hewlett-Packard Co. + * David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999,2000 Walt Drummond + * + * 00/04/19 D. Mosberger Rewritten to mirror more closely the x86 I/O + * APIC code. In particular, we now have separate + * handlers for edge and level triggered + * interrupts. + * 00/10/27 Asit Mallick, Goutham Rao IRQ vector + * allocation PCI to vector mapping, shared PCI + * interrupts. + * 00/10/27 D. Mosberger Document things a bit more to make them more + * understandable. Clean up much of the old + * IOSAPIC cruft. + * 01/07/27 J.I. Lee PCI irq routing, Platform/Legacy interrupts + * and fixes for ACPI S5(SoftOff) support. + * 02/01/23 J.I. Lee iosapic pgm fixes for PCI irq routing from _PRT + * 02/01/07 E. Focht Redirectable interrupt + * vectors in iosapic_set_affinity(), + * initializations for /proc/irq/#/smp_affinity + * 02/04/02 P. Diefenbaugh Cleaned up ACPI PCI IRQ routing. + * 02/04/18 J.I. Lee bug fix in iosapic_init_pci_irq + * 02/04/30 J.I. Lee bug fix in find_iosapic to fix ACPI PCI IRQ to + * IOSAPIC mapping error + * 02/07/29 T. Kochi Allocate interrupt vectors dynamically + * 02/08/04 T. Kochi Cleaned up terminology (irq, global system + * interrupt, vector, etc.) + * 02/09/20 D. Mosberger Simplified by taking advantage of ACPI's + * pci_irq code. + * 03/02/19 B. Helgaas Make pcat_compat system-wide, not per-IOSAPIC. + * Remove iosapic_address & gsi_base from + * external interfaces. Rationalize + * __init/__devinit attributes. + * 04/12/04 Ashok Raj Intel Corporation 2004 + * Updated to work with irq migration necessary + * for CPU Hotplug + */ +/* + * Here is what the interrupt logic between a PCI device and the kernel looks + * like: + * + * (1) A PCI device raises one of the four interrupt pins (INTA, INTB, INTC, + * INTD). The device is uniquely identified by its bus-, and slot-number + * (the function number does not matter here because all functions share + * the same interrupt lines). + * + * (2) The motherboard routes the interrupt line to a pin on a IOSAPIC + * controller. Multiple interrupt lines may have to share the same + * IOSAPIC pin (if they're level triggered and use the same polarity). + * Each interrupt line has a unique Global System Interrupt (GSI) number + * which can be calculated as the sum of the controller's base GSI number + * and the IOSAPIC pin number to which the line connects. + * + * (3) The IOSAPIC uses an internal routing table entries (RTEs) to map the + * IOSAPIC pin into the IA-64 interrupt vector. This interrupt vector is then + * sent to the CPU. + * + * (4) The kernel recognizes an interrupt as an IRQ. The IRQ interface is + * used as architecture-independent interrupt handling mechanism in Linux. + * As an IRQ is a number, we have to have + * IA-64 interrupt vector number <-> IRQ number mapping. On smaller + * systems, we use one-to-one mapping between IA-64 vector and IRQ. + * + * To sum up, there are three levels of mappings involved: + * + * PCI pin -> global system interrupt (GSI) -> IA-64 vector <-> IRQ + * + * Note: The term "IRQ" is loosely used everywhere in Linux kernel to + * describe interrupts. Now we use "IRQ" only for Linux IRQ's. ISA IRQ + * (isa_irq) is the only exception in this source code. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#undef DEBUG_INTERRUPT_ROUTING + +#ifdef DEBUG_INTERRUPT_ROUTING +#define DBG(fmt...) printk(fmt) +#else +#define DBG(fmt...) +#endif + +static DEFINE_SPINLOCK(iosapic_lock); + +/* + * These tables map IA-64 vectors to the IOSAPIC pin that generates this + * vector. + */ + +#define NO_REF_RTE 0 + +static struct iosapic { + char __iomem *addr; /* base address of IOSAPIC */ + unsigned int gsi_base; /* GSI base */ + unsigned short num_rte; /* # of RTEs on this IOSAPIC */ + int rtes_inuse; /* # of RTEs in use on this IOSAPIC */ +#ifdef CONFIG_NUMA + unsigned short node; /* numa node association via pxm */ +#endif + spinlock_t lock; /* lock for indirect reg access */ +} iosapic_lists[NR_IOSAPICS]; + +struct iosapic_rte_info { + struct list_head rte_list; /* RTEs sharing the same vector */ + char rte_index; /* IOSAPIC RTE index */ + int refcnt; /* reference counter */ + struct iosapic *iosapic; +} ____cacheline_aligned; + +static struct iosapic_intr_info { + struct list_head rtes; /* RTEs using this vector (empty => + * not an IOSAPIC interrupt) */ + int count; /* # of registered RTEs */ + u32 low32; /* current value of low word of + * Redirection table entry */ + unsigned int dest; /* destination CPU physical ID */ + unsigned char dmode : 3; /* delivery mode (see iosapic.h) */ + unsigned char polarity: 1; /* interrupt polarity + * (see iosapic.h) */ + unsigned char trigger : 1; /* trigger mode (see iosapic.h) */ +} iosapic_intr_info[NR_IRQS]; + +static unsigned char pcat_compat; /* 8259 compatibility flag */ + +static inline void +iosapic_write(struct iosapic *iosapic, unsigned int reg, u32 val) +{ + unsigned long flags; + + spin_lock_irqsave(&iosapic->lock, flags); + __iosapic_write(iosapic->addr, reg, val); + spin_unlock_irqrestore(&iosapic->lock, flags); +} + +/* + * Find an IOSAPIC associated with a GSI + */ +static inline int +find_iosapic (unsigned int gsi) +{ + int i; + + for (i = 0; i < NR_IOSAPICS; i++) { + if ((unsigned) (gsi - iosapic_lists[i].gsi_base) < + iosapic_lists[i].num_rte) + return i; + } + + return -1; +} + +static inline int __gsi_to_irq(unsigned int gsi) +{ + int irq; + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + + for (irq = 0; irq < NR_IRQS; irq++) { + info = &iosapic_intr_info[irq]; + list_for_each_entry(rte, &info->rtes, rte_list) + if (rte->iosapic->gsi_base + rte->rte_index == gsi) + return irq; + } + return -1; +} + +int +gsi_to_irq (unsigned int gsi) +{ + unsigned long flags; + int irq; + + spin_lock_irqsave(&iosapic_lock, flags); + irq = __gsi_to_irq(gsi); + spin_unlock_irqrestore(&iosapic_lock, flags); + return irq; +} + +static struct iosapic_rte_info *find_rte(unsigned int irq, unsigned int gsi) +{ + struct iosapic_rte_info *rte; + + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) + if (rte->iosapic->gsi_base + rte->rte_index == gsi) + return rte; + return NULL; +} + +static void +set_rte (unsigned int gsi, unsigned int irq, unsigned int dest, int mask) +{ + unsigned long pol, trigger, dmode; + u32 low32, high32; + int rte_index; + char redir; + struct iosapic_rte_info *rte; + ia64_vector vector = irq_to_vector(irq); + + DBG(KERN_DEBUG"IOSAPIC: routing vector %d to 0x%x\n", vector, dest); + + rte = find_rte(irq, gsi); + if (!rte) + return; /* not an IOSAPIC interrupt */ + + rte_index = rte->rte_index; + pol = iosapic_intr_info[irq].polarity; + trigger = iosapic_intr_info[irq].trigger; + dmode = iosapic_intr_info[irq].dmode; + + redir = (dmode == IOSAPIC_LOWEST_PRIORITY) ? 1 : 0; + +#ifdef CONFIG_SMP + set_irq_affinity_info(irq, (int)(dest & 0xffff), redir); +#endif + + low32 = ((pol << IOSAPIC_POLARITY_SHIFT) | + (trigger << IOSAPIC_TRIGGER_SHIFT) | + (dmode << IOSAPIC_DELIVERY_SHIFT) | + ((mask ? 1 : 0) << IOSAPIC_MASK_SHIFT) | + vector); + + /* dest contains both id and eid */ + high32 = (dest << IOSAPIC_DEST_SHIFT); + + iosapic_write(rte->iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + iosapic_intr_info[irq].low32 = low32; + iosapic_intr_info[irq].dest = dest; +} + +static void +iosapic_nop (struct irq_data *data) +{ + /* do nothing... */ +} + + +#ifdef CONFIG_KEXEC +void +kexec_disable_iosapic(void) +{ + struct iosapic_intr_info *info; + struct iosapic_rte_info *rte; + ia64_vector vec; + int irq; + + for (irq = 0; irq < NR_IRQS; irq++) { + info = &iosapic_intr_info[irq]; + vec = irq_to_vector(irq); + list_for_each_entry(rte, &info->rtes, + rte_list) { + iosapic_write(rte->iosapic, + IOSAPIC_RTE_LOW(rte->rte_index), + IOSAPIC_MASK|vec); + iosapic_eoi(rte->iosapic->addr, vec); + } + } +} +#endif + +static void +mask_irq (struct irq_data *data) +{ + unsigned int irq = data->irq; + u32 low32; + int rte_index; + struct iosapic_rte_info *rte; + + if (!iosapic_intr_info[irq].count) + return; /* not an IOSAPIC interrupt! */ + + /* set only the mask bit */ + low32 = iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + rte_index = rte->rte_index; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + } +} + +static void +unmask_irq (struct irq_data *data) +{ + unsigned int irq = data->irq; + u32 low32; + int rte_index; + struct iosapic_rte_info *rte; + + if (!iosapic_intr_info[irq].count) + return; /* not an IOSAPIC interrupt! */ + + low32 = iosapic_intr_info[irq].low32 &= ~IOSAPIC_MASK; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + rte_index = rte->rte_index; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + } +} + + +static int +iosapic_set_affinity(struct irq_data *data, const struct cpumask *mask, + bool force) +{ +#ifdef CONFIG_SMP + unsigned int irq = data->irq; + u32 high32, low32; + int cpu, dest, rte_index; + int redir = (irq & IA64_IRQ_REDIRECTED) ? 1 : 0; + struct iosapic_rte_info *rte; + struct iosapic *iosapic; + + irq &= (~IA64_IRQ_REDIRECTED); + + cpu = cpumask_first_and(cpu_online_mask, mask); + if (cpu >= nr_cpu_ids) + return -1; + + if (irq_prepare_move(irq, cpu)) + return -1; + + dest = cpu_physical_id(cpu); + + if (!iosapic_intr_info[irq].count) + return -1; /* not an IOSAPIC interrupt */ + + set_irq_affinity_info(irq, dest, redir); + + /* dest contains both id and eid */ + high32 = dest << IOSAPIC_DEST_SHIFT; + + low32 = iosapic_intr_info[irq].low32 & ~(7 << IOSAPIC_DELIVERY_SHIFT); + if (redir) + /* change delivery mode to lowest priority */ + low32 |= (IOSAPIC_LOWEST_PRIORITY << IOSAPIC_DELIVERY_SHIFT); + else + /* change delivery mode to fixed */ + low32 |= (IOSAPIC_FIXED << IOSAPIC_DELIVERY_SHIFT); + low32 &= IOSAPIC_VECTOR_MASK; + low32 |= irq_to_vector(irq); + + iosapic_intr_info[irq].low32 = low32; + iosapic_intr_info[irq].dest = dest; + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) { + iosapic = rte->iosapic; + rte_index = rte->rte_index; + iosapic_write(iosapic, IOSAPIC_RTE_HIGH(rte_index), high32); + iosapic_write(iosapic, IOSAPIC_RTE_LOW(rte_index), low32); + } + +#endif + return 0; +} + +/* + * Handlers for level-triggered interrupts. + */ + +static unsigned int +iosapic_startup_level_irq (struct irq_data *data) +{ + unmask_irq(data); + return 0; +} + +static void +iosapic_unmask_level_irq (struct irq_data *data) +{ + unsigned int irq = data->irq; + ia64_vector vec = irq_to_vector(irq); + struct iosapic_rte_info *rte; + int do_unmask_irq = 0; + + irq_complete_move(irq); + if (unlikely(irqd_is_setaffinity_pending(data))) { + do_unmask_irq = 1; + mask_irq(data); + } else + unmask_irq(data); + + list_for_each_entry(rte, &iosapic_intr_info[irq].rtes, rte_list) + iosapic_eoi(rte->iosapic->addr, vec); + + if (unlikely(do_unmask_irq)) { + irq_move_masked_irq(data); + unmask_irq(data); + } +} + +#define iosapic_shutdown_level_irq mask_irq +#define iosapic_enable_level_irq unmask_irq +#define iosapic_disable_level_irq mask_irq +#define iosapic_ack_level_irq iosapic_nop + +static struct irq_chip irq_type_iosapic_level = { + .name = "IO-SAPIC-level", + .irq_startup = iosapic_startup_level_irq, + .irq_shutdown = iosapic_shutdown_level_irq, + .irq_enable = iosapic_enable_level_irq, + .irq_disable = iosapic_disable_level_irq, + .irq_ack = iosapic_ack_level_irq, + .irq_mask = mask_irq, + .irq_unmask = iosapic_unmask_level_irq, + .irq_set_affinity = iosapic_set_affinity +}; + +/* + * Handlers for edge-triggered interrupts. + */ + +static unsigned int +iosapic_startup_edge_irq (struct irq_data *data) +{ + unmask_irq(data); + /* + * IOSAPIC simply drops interrupts pended while the + * corresponding pin was masked, so we can't know if an + * interrupt is pending already. Let's hope not... + */ + return 0; +} + +static void +iosapic_ack_edge_irq (struct irq_data *data) +{ + irq_complete_move(data->irq); + irq_move_irq(data); +} + +#define iosapic_enable_edge_irq unmask_irq +#define iosapic_disable_edge_irq iosapic_nop + +static struct irq_chip irq_type_iosapic_edge = { + .name = "IO-SAPIC-edge", + .irq_startup = iosapic_startup_edge_irq, + .irq_shutdown = iosapic_disable_edge_irq, + .irq_enable = iosapic_enable_edge_irq, + .irq_disable = iosapic_disable_edge_irq, + .irq_ack = iosapic_ack_edge_irq, + .irq_mask = mask_irq, + .irq_unmask = unmask_irq, + .irq_set_affinity = iosapic_set_affinity +}; + +static unsigned int +iosapic_version (char __iomem *addr) +{ + /* + * IOSAPIC Version Register return 32 bit structure like: + * { + * unsigned int version : 8; + * unsigned int reserved1 : 8; + * unsigned int max_redir : 8; + * unsigned int reserved2 : 8; + * } + */ + return __iosapic_read(addr, IOSAPIC_VERSION); +} + +static int iosapic_find_sharable_irq(unsigned long trigger, unsigned long pol) +{ + int i, irq = -ENOSPC, min_count = -1; + struct iosapic_intr_info *info; + + /* + * shared vectors for edge-triggered interrupts are not + * supported yet + */ + if (trigger == IOSAPIC_EDGE) + return -EINVAL; + + for (i = 0; i < NR_IRQS; i++) { + info = &iosapic_intr_info[i]; + if (info->trigger == trigger && info->polarity == pol && + (info->dmode == IOSAPIC_FIXED || + info->dmode == IOSAPIC_LOWEST_PRIORITY) && + can_request_irq(i, IRQF_SHARED)) { + if (min_count == -1 || info->count < min_count) { + irq = i; + min_count = info->count; + } + } + } + return irq; +} + +/* + * if the given vector is already owned by other, + * assign a new vector for the other and make the vector available + */ +static void __init +iosapic_reassign_vector (int irq) +{ + int new_irq; + + if (iosapic_intr_info[irq].count) { + new_irq = create_irq(); + if (new_irq < 0) + panic("%s: out of interrupt vectors!\n", __func__); + printk(KERN_INFO "Reassigning vector %d to %d\n", + irq_to_vector(irq), irq_to_vector(new_irq)); + memcpy(&iosapic_intr_info[new_irq], &iosapic_intr_info[irq], + sizeof(struct iosapic_intr_info)); + INIT_LIST_HEAD(&iosapic_intr_info[new_irq].rtes); + list_move(iosapic_intr_info[irq].rtes.next, + &iosapic_intr_info[new_irq].rtes); + memset(&iosapic_intr_info[irq], 0, + sizeof(struct iosapic_intr_info)); + iosapic_intr_info[irq].low32 = IOSAPIC_MASK; + INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); + } +} + +static inline int irq_is_shared (int irq) +{ + return (iosapic_intr_info[irq].count > 1); +} + +struct irq_chip* +ia64_native_iosapic_get_irq_chip(unsigned long trigger) +{ + if (trigger == IOSAPIC_EDGE) + return &irq_type_iosapic_edge; + else + return &irq_type_iosapic_level; +} + +static int +register_intr (unsigned int gsi, int irq, unsigned char delivery, + unsigned long polarity, unsigned long trigger) +{ + struct irq_chip *chip, *irq_type; + int index; + struct iosapic_rte_info *rte; + + index = find_iosapic(gsi); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __func__, gsi); + return -ENODEV; + } + + rte = find_rte(irq, gsi); + if (!rte) { + rte = kzalloc(sizeof (*rte), GFP_ATOMIC); + if (!rte) { + printk(KERN_WARNING "%s: cannot allocate memory\n", + __func__); + return -ENOMEM; + } + + rte->iosapic = &iosapic_lists[index]; + rte->rte_index = gsi - rte->iosapic->gsi_base; + rte->refcnt++; + list_add_tail(&rte->rte_list, &iosapic_intr_info[irq].rtes); + iosapic_intr_info[irq].count++; + iosapic_lists[index].rtes_inuse++; + } + else if (rte->refcnt == NO_REF_RTE) { + struct iosapic_intr_info *info = &iosapic_intr_info[irq]; + if (info->count > 0 && + (info->trigger != trigger || info->polarity != polarity)){ + printk (KERN_WARNING + "%s: cannot override the interrupt\n", + __func__); + return -EINVAL; + } + rte->refcnt++; + iosapic_intr_info[irq].count++; + iosapic_lists[index].rtes_inuse++; + } + + iosapic_intr_info[irq].polarity = polarity; + iosapic_intr_info[irq].dmode = delivery; + iosapic_intr_info[irq].trigger = trigger; + + irq_type = iosapic_get_irq_chip(trigger); + + chip = irq_get_chip(irq); + if (irq_type != NULL && chip != irq_type) { + if (chip != &no_irq_chip) + printk(KERN_WARNING + "%s: changing vector %d from %s to %s\n", + __func__, irq_to_vector(irq), + chip->name, irq_type->name); + chip = irq_type; + } + irq_set_chip_handler_name_locked(irq_get_irq_data(irq), chip, + trigger == IOSAPIC_EDGE ? handle_edge_irq : handle_level_irq, + NULL); + return 0; +} + +static unsigned int +get_target_cpu (unsigned int gsi, int irq) +{ +#ifdef CONFIG_SMP + static int cpu = -1; + extern int cpe_vector; + cpumask_t domain = irq_to_domain(irq); + + /* + * In case of vector shared by multiple RTEs, all RTEs that + * share the vector need to use the same destination CPU. + */ + if (iosapic_intr_info[irq].count) + return iosapic_intr_info[irq].dest; + + /* + * If the platform supports redirection via XTP, let it + * distribute interrupts. + */ + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + return cpu_physical_id(smp_processor_id()); + + /* + * Some interrupts (ACPI SCI, for instance) are registered + * before the BSP is marked as online. + */ + if (!cpu_online(smp_processor_id())) + return cpu_physical_id(smp_processor_id()); + + if (cpe_vector > 0 && irq_to_vector(irq) == IA64_CPEP_VECTOR) + return get_cpei_target_cpu(); + +#ifdef CONFIG_NUMA + { + int num_cpus, cpu_index, iosapic_index, numa_cpu, i = 0; + const struct cpumask *cpu_mask; + + iosapic_index = find_iosapic(gsi); + if (iosapic_index < 0 || + iosapic_lists[iosapic_index].node == MAX_NUMNODES) + goto skip_numa_setup; + + cpu_mask = cpumask_of_node(iosapic_lists[iosapic_index].node); + num_cpus = 0; + for_each_cpu_and(numa_cpu, cpu_mask, &domain) { + if (cpu_online(numa_cpu)) + num_cpus++; + } + + if (!num_cpus) + goto skip_numa_setup; + + /* Use irq assignment to distribute across cpus in node */ + cpu_index = irq % num_cpus; + + for_each_cpu_and(numa_cpu, cpu_mask, &domain) + if (cpu_online(numa_cpu) && i++ >= cpu_index) + break; + + if (numa_cpu < nr_cpu_ids) + return cpu_physical_id(numa_cpu); + } +skip_numa_setup: +#endif + /* + * Otherwise, round-robin interrupt vectors across all the + * processors. (It'd be nice if we could be smarter in the + * case of NUMA.) + */ + do { + if (++cpu >= nr_cpu_ids) + cpu = 0; + } while (!cpu_online(cpu) || !cpumask_test_cpu(cpu, &domain)); + + return cpu_physical_id(cpu); +#else /* CONFIG_SMP */ + return cpu_physical_id(smp_processor_id()); +#endif +} + +static inline unsigned char choose_dmode(void) +{ +#ifdef CONFIG_SMP + if (smp_int_redirect & SMP_IRQ_REDIRECTION) + return IOSAPIC_LOWEST_PRIORITY; +#endif + return IOSAPIC_FIXED; +} + +/* + * ACPI can describe IOSAPIC interrupts via static tables and namespace + * methods. This provides an interface to register those interrupts and + * program the IOSAPIC RTE. + */ +int +iosapic_register_intr (unsigned int gsi, + unsigned long polarity, unsigned long trigger) +{ + int irq, mask = 1, err; + unsigned int dest; + unsigned long flags; + struct iosapic_rte_info *rte; + u32 low32; + unsigned char dmode; + struct irq_desc *desc; + + /* + * If this GSI has already been registered (i.e., it's a + * shared interrupt, or we lost a race to register it), + * don't touch the RTE. + */ + spin_lock_irqsave(&iosapic_lock, flags); + irq = __gsi_to_irq(gsi); + if (irq > 0) { + rte = find_rte(irq, gsi); + if(iosapic_intr_info[irq].count == 0) { + assign_irq_vector(irq); + irq_init_desc(irq); + } else if (rte->refcnt != NO_REF_RTE) { + rte->refcnt++; + goto unlock_iosapic_lock; + } + } else + irq = create_irq(); + + /* If vector is running out, we try to find a sharable vector */ + if (irq < 0) { + irq = iosapic_find_sharable_irq(trigger, polarity); + if (irq < 0) + goto unlock_iosapic_lock; + } + + desc = irq_to_desc(irq); + raw_spin_lock(&desc->lock); + dest = get_target_cpu(gsi, irq); + dmode = choose_dmode(); + err = register_intr(gsi, irq, dmode, polarity, trigger); + if (err < 0) { + raw_spin_unlock(&desc->lock); + irq = err; + goto unlock_iosapic_lock; + } + + /* + * If the vector is shared and already unmasked for other + * interrupt sources, don't mask it. + */ + low32 = iosapic_intr_info[irq].low32; + if (irq_is_shared(irq) && !(low32 & IOSAPIC_MASK)) + mask = 0; + set_rte(gsi, irq, dest, mask); + + printk(KERN_INFO "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, irq_to_vector(irq)); + + raw_spin_unlock(&desc->lock); + unlock_iosapic_lock: + spin_unlock_irqrestore(&iosapic_lock, flags); + return irq; +} + +void +iosapic_unregister_intr (unsigned int gsi) +{ + unsigned long flags; + int irq, index; + u32 low32; + unsigned long trigger, polarity; + unsigned int dest; + struct iosapic_rte_info *rte; + + /* + * If the irq associated with the gsi is not found, + * iosapic_unregister_intr() is unbalanced. We need to check + * this again after getting locks. + */ + irq = gsi_to_irq(gsi); + if (irq < 0) { + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); + WARN_ON(1); + return; + } + + spin_lock_irqsave(&iosapic_lock, flags); + if ((rte = find_rte(irq, gsi)) == NULL) { + printk(KERN_ERR "iosapic_unregister_intr(%u) unbalanced\n", + gsi); + WARN_ON(1); + goto out; + } + + if (--rte->refcnt > 0) + goto out; + + rte->refcnt = NO_REF_RTE; + + /* Mask the interrupt */ + low32 = iosapic_intr_info[irq].low32 | IOSAPIC_MASK; + iosapic_write(rte->iosapic, IOSAPIC_RTE_LOW(rte->rte_index), low32); + + iosapic_intr_info[irq].count--; + index = find_iosapic(gsi); + iosapic_lists[index].rtes_inuse--; + WARN_ON(iosapic_lists[index].rtes_inuse < 0); + + trigger = iosapic_intr_info[irq].trigger; + polarity = iosapic_intr_info[irq].polarity; + dest = iosapic_intr_info[irq].dest; + printk(KERN_INFO + "GSI %u (%s, %s) -> CPU %d (0x%04x) vector %d unregistered\n", + gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, irq_to_vector(irq)); + + if (iosapic_intr_info[irq].count == 0) { +#ifdef CONFIG_SMP + /* Clear affinity */ + irq_data_update_affinity(irq_get_irq_data(irq), cpu_all_mask); +#endif + /* Clear the interrupt information */ + iosapic_intr_info[irq].dest = 0; + iosapic_intr_info[irq].dmode = 0; + iosapic_intr_info[irq].polarity = 0; + iosapic_intr_info[irq].trigger = 0; + iosapic_intr_info[irq].low32 |= IOSAPIC_MASK; + + /* Destroy and reserve IRQ */ + destroy_and_reserve_irq(irq); + } + out: + spin_unlock_irqrestore(&iosapic_lock, flags); +} + +/* + * ACPI calls this when it finds an entry for a platform interrupt. + */ +int __init +iosapic_register_platform_intr (u32 int_type, unsigned int gsi, + int iosapic_vector, u16 eid, u16 id, + unsigned long polarity, unsigned long trigger) +{ + static const char * const name[] = {"unknown", "PMI", "INIT", "CPEI"}; + unsigned char delivery; + int irq, vector, mask = 0; + unsigned int dest = ((id << 8) | eid) & 0xffff; + + switch (int_type) { + case ACPI_INTERRUPT_PMI: + irq = vector = iosapic_vector; + bind_irq_vector(irq, vector, CPU_MASK_ALL); + /* + * since PMI vector is alloc'd by FW(ACPI) not by kernel, + * we need to make sure the vector is available + */ + iosapic_reassign_vector(irq); + delivery = IOSAPIC_PMI; + break; + case ACPI_INTERRUPT_INIT: + irq = create_irq(); + if (irq < 0) + panic("%s: out of interrupt vectors!\n", __func__); + vector = irq_to_vector(irq); + delivery = IOSAPIC_INIT; + break; + case ACPI_INTERRUPT_CPEI: + irq = vector = IA64_CPE_VECTOR; + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); + delivery = IOSAPIC_FIXED; + mask = 1; + break; + default: + printk(KERN_ERR "%s: invalid int type 0x%x\n", __func__, + int_type); + return -1; + } + + register_intr(gsi, irq, delivery, polarity, trigger); + + printk(KERN_INFO + "PLATFORM int %s (0x%x): GSI %u (%s, %s) -> CPU %d (0x%04x)" + " vector %d\n", + int_type < ARRAY_SIZE(name) ? name[int_type] : "unknown", + int_type, gsi, (trigger == IOSAPIC_EDGE ? "edge" : "level"), + (polarity == IOSAPIC_POL_HIGH ? "high" : "low"), + cpu_logical_id(dest), dest, vector); + + set_rte(gsi, irq, dest, mask); + return vector; +} + +/* + * ACPI calls this when it finds an entry for a legacy ISA IRQ override. + */ +void iosapic_override_isa_irq(unsigned int isa_irq, unsigned int gsi, + unsigned long polarity, unsigned long trigger) +{ + int vector, irq; + unsigned int dest = cpu_physical_id(smp_processor_id()); + unsigned char dmode; + + irq = vector = isa_irq_to_vector(isa_irq); + BUG_ON(bind_irq_vector(irq, vector, CPU_MASK_ALL)); + dmode = choose_dmode(); + register_intr(gsi, irq, dmode, polarity, trigger); + + DBG("ISA: IRQ %u -> GSI %u (%s,%s) -> CPU %d (0x%04x) vector %d\n", + isa_irq, gsi, trigger == IOSAPIC_EDGE ? "edge" : "level", + polarity == IOSAPIC_POL_HIGH ? "high" : "low", + cpu_logical_id(dest), dest, vector); + + set_rte(gsi, irq, dest, 1); +} + +void __init +ia64_native_iosapic_pcat_compat_init(void) +{ + if (pcat_compat) { + /* + * Disable the compatibility mode interrupts (8259 style), + * needs IN/OUT support enabled. + */ + printk(KERN_INFO + "%s: Disabling PC-AT compatible 8259 interrupts\n", + __func__); + outb(0xff, 0xA1); + outb(0xff, 0x21); + } +} + +void __init +iosapic_system_init (int system_pcat_compat) +{ + int irq; + + for (irq = 0; irq < NR_IRQS; ++irq) { + iosapic_intr_info[irq].low32 = IOSAPIC_MASK; + /* mark as unused */ + INIT_LIST_HEAD(&iosapic_intr_info[irq].rtes); + + iosapic_intr_info[irq].count = 0; + } + + pcat_compat = system_pcat_compat; + if (pcat_compat) + iosapic_pcat_compat_init(); +} + +static inline int +iosapic_alloc (void) +{ + int index; + + for (index = 0; index < NR_IOSAPICS; index++) + if (!iosapic_lists[index].addr) + return index; + + printk(KERN_WARNING "%s: failed to allocate iosapic\n", __func__); + return -1; +} + +static inline void +iosapic_free (int index) +{ + memset(&iosapic_lists[index], 0, sizeof(iosapic_lists[0])); +} + +static inline int +iosapic_check_gsi_range (unsigned int gsi_base, unsigned int ver) +{ + int index; + unsigned int gsi_end, base, end; + + /* check gsi range */ + gsi_end = gsi_base + ((ver >> 16) & 0xff); + for (index = 0; index < NR_IOSAPICS; index++) { + if (!iosapic_lists[index].addr) + continue; + + base = iosapic_lists[index].gsi_base; + end = base + iosapic_lists[index].num_rte - 1; + + if (gsi_end < base || end < gsi_base) + continue; /* OK */ + + return -EBUSY; + } + return 0; +} + +static int +iosapic_delete_rte(unsigned int irq, unsigned int gsi) +{ + struct iosapic_rte_info *rte, *temp; + + list_for_each_entry_safe(rte, temp, &iosapic_intr_info[irq].rtes, + rte_list) { + if (rte->iosapic->gsi_base + rte->rte_index == gsi) { + if (rte->refcnt) + return -EBUSY; + + list_del(&rte->rte_list); + kfree(rte); + return 0; + } + } + + return -EINVAL; +} + +int iosapic_init(unsigned long phys_addr, unsigned int gsi_base) +{ + int num_rte, err, index; + unsigned int isa_irq, ver; + char __iomem *addr; + unsigned long flags; + + spin_lock_irqsave(&iosapic_lock, flags); + index = find_iosapic(gsi_base); + if (index >= 0) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -EBUSY; + } + + addr = ioremap(phys_addr, 0); + if (addr == NULL) { + spin_unlock_irqrestore(&iosapic_lock, flags); + return -ENOMEM; + } + ver = iosapic_version(addr); + if ((err = iosapic_check_gsi_range(gsi_base, ver))) { + iounmap(addr); + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; + } + + /* + * The MAX_REDIR register holds the highest input pin number + * (starting from 0). We add 1 so that we can use it for + * number of pins (= RTEs) + */ + num_rte = ((ver >> 16) & 0xff) + 1; + + index = iosapic_alloc(); + iosapic_lists[index].addr = addr; + iosapic_lists[index].gsi_base = gsi_base; + iosapic_lists[index].num_rte = num_rte; +#ifdef CONFIG_NUMA + iosapic_lists[index].node = MAX_NUMNODES; +#endif + spin_lock_init(&iosapic_lists[index].lock); + spin_unlock_irqrestore(&iosapic_lock, flags); + + if ((gsi_base == 0) && pcat_compat) { + /* + * Map the legacy ISA devices into the IOSAPIC data. Some of + * these may get reprogrammed later on with data from the ACPI + * Interrupt Source Override table. + */ + for (isa_irq = 0; isa_irq < 16; ++isa_irq) + iosapic_override_isa_irq(isa_irq, isa_irq, + IOSAPIC_POL_HIGH, + IOSAPIC_EDGE); + } + return 0; +} + +int iosapic_remove(unsigned int gsi_base) +{ + int i, irq, index, err = 0; + unsigned long flags; + + spin_lock_irqsave(&iosapic_lock, flags); + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI base %u\n", + __func__, gsi_base); + goto out; + } + + if (iosapic_lists[index].rtes_inuse) { + err = -EBUSY; + printk(KERN_WARNING "%s: IOSAPIC for GSI base %u is busy\n", + __func__, gsi_base); + goto out; + } + + for (i = gsi_base; i < gsi_base + iosapic_lists[index].num_rte; i++) { + irq = __gsi_to_irq(i); + if (irq < 0) + continue; + + err = iosapic_delete_rte(irq, i); + if (err) + goto out; + } + + iounmap(iosapic_lists[index].addr); + iosapic_free(index); + out: + spin_unlock_irqrestore(&iosapic_lock, flags); + return err; +} + +#ifdef CONFIG_NUMA +void map_iosapic_to_node(unsigned int gsi_base, int node) +{ + int index; + + index = find_iosapic(gsi_base); + if (index < 0) { + printk(KERN_WARNING "%s: No IOSAPIC for GSI %u\n", + __func__, gsi_base); + return; + } + iosapic_lists[index].node = node; + return; +} +#endif diff --git a/arch/ia64/kernel/irq.c b/arch/ia64/kernel/irq.c new file mode 100644 index 000000000000..275b9ea58c64 --- /dev/null +++ b/arch/ia64/kernel/irq.c @@ -0,0 +1,181 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/ia64/kernel/irq.c + * + * Copyright (C) 1992, 1998 Linus Torvalds, Ingo Molnar + * + * This file contains the code used by various IRQ handling routines: + * asking for different IRQs should be done through these routines + * instead of just grabbing them. Thus setups with different IRQ numbers + * shouldn't result in any weird surprises, and installing new handlers + * should be easier. + * + * Copyright (C) Ashok Raj, Intel Corporation 2004 + * + * 4/14/2004: Added code to handle cpu migration and do safe irq + * migration without losing interrupts for iosapic + * architecture. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * 'what should we do if we get a hw irq event on an illegal vector'. + * each architecture has to answer this themselves. + */ +void ack_bad_irq(unsigned int irq) +{ + printk(KERN_ERR "Unexpected irq vector 0x%x on CPU %u!\n", irq, smp_processor_id()); +} + +/* + * Interrupt statistics: + */ + +atomic_t irq_err_count; + +/* + * /proc/interrupts printing: + */ +int arch_show_interrupts(struct seq_file *p, int prec) +{ + seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); + return 0; +} + +#ifdef CONFIG_SMP +static char irq_redir [NR_IRQS]; // = { [0 ... NR_IRQS-1] = 1 }; + +void set_irq_affinity_info (unsigned int irq, int hwid, int redir) +{ + if (irq < NR_IRQS) { + irq_data_update_affinity(irq_get_irq_data(irq), + cpumask_of(cpu_logical_id(hwid))); + irq_redir[irq] = (char) (redir & 0xff); + } +} +#endif /* CONFIG_SMP */ + +int __init arch_early_irq_init(void) +{ + ia64_mca_irq_init(); + return 0; +} + +#ifdef CONFIG_HOTPLUG_CPU +unsigned int vectors_in_migration[NR_IRQS]; + +/* + * Since cpu_online_mask is already updated, we just need to check for + * affinity that has zeros + */ +static void migrate_irqs(void) +{ + int irq, new_cpu; + + for (irq=0; irq < NR_IRQS; irq++) { + struct irq_desc *desc = irq_to_desc(irq); + struct irq_data *data = irq_desc_get_irq_data(desc); + struct irq_chip *chip = irq_data_get_irq_chip(data); + + if (irqd_irq_disabled(data)) + continue; + + /* + * No handling for now. + * TBD: Implement a disable function so we can now + * tell CPU not to respond to these local intr sources. + * such as ITV,CPEI,MCA etc. + */ + if (irqd_is_per_cpu(data)) + continue; + + if (cpumask_any_and(irq_data_get_affinity_mask(data), + cpu_online_mask) >= nr_cpu_ids) { + /* + * Save it for phase 2 processing + */ + vectors_in_migration[irq] = irq; + + new_cpu = cpumask_any(cpu_online_mask); + + /* + * Al three are essential, currently WARN_ON.. maybe panic? + */ + if (chip && chip->irq_disable && + chip->irq_enable && chip->irq_set_affinity) { + chip->irq_disable(data); + chip->irq_set_affinity(data, + cpumask_of(new_cpu), false); + chip->irq_enable(data); + } else { + WARN_ON((!chip || !chip->irq_disable || + !chip->irq_enable || + !chip->irq_set_affinity)); + } + } + } +} + +void fixup_irqs(void) +{ + unsigned int irq; + extern void ia64_process_pending_intr(void); + extern volatile int time_keeper_id; + + /* Mask ITV to disable timer */ + ia64_set_itv(1 << 16); + + /* + * Find a new timesync master + */ + if (smp_processor_id() == time_keeper_id) { + time_keeper_id = cpumask_first(cpu_online_mask); + printk ("CPU %d is now promoted to time-keeper master\n", time_keeper_id); + } + + /* + * Phase 1: Locate IRQs bound to this cpu and + * relocate them for cpu removal. + */ + migrate_irqs(); + + /* + * Phase 2: Perform interrupt processing for all entries reported in + * local APIC. + */ + ia64_process_pending_intr(); + + /* + * Phase 3: Now handle any interrupts not captured in local APIC. + * This is to account for cases that device interrupted during the time the + * rte was being disabled and re-programmed. + */ + for (irq=0; irq < NR_IRQS; irq++) { + if (vectors_in_migration[irq]) { + struct pt_regs *old_regs = set_irq_regs(NULL); + + vectors_in_migration[irq]=0; + generic_handle_irq(irq); + set_irq_regs(old_regs); + } + } + + /* + * Now let processor die. We do irq disable and max_xtp() to + * ensure there is no more interrupts routed to this processor. + * But the local timer interrupt can have 1 pending which we + * take care in timer_interrupt(). + */ + max_xtp(); + local_irq_disable(); +} +#endif diff --git a/arch/ia64/kernel/irq.h b/arch/ia64/kernel/irq.h new file mode 100644 index 000000000000..4d16f3cbeb1d --- /dev/null +++ b/arch/ia64/kernel/irq.h @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +extern void register_percpu_irq(ia64_vector vec, irq_handler_t handler, + unsigned long flags, const char *name); diff --git a/arch/ia64/kernel/irq_ia64.c b/arch/ia64/kernel/irq_ia64.c new file mode 100644 index 000000000000..46e33c5cb53d --- /dev/null +++ b/arch/ia64/kernel/irq_ia64.c @@ -0,0 +1,645 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * linux/arch/ia64/kernel/irq_ia64.c + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + * + * 6/10/99: Updated to bring in sync with x86 version to facilitate + * support for SMP and different interrupt controllers. + * + * 09/15/00 Goutham Rao Implemented pci_irq_to_vector + * PCI to vector allocation routine. + * 04/14/2004 Ashok Raj + * Added CPU Hotplug handling for IPF. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#define IRQ_DEBUG 0 + +#define IRQ_VECTOR_UNASSIGNED (0) + +#define IRQ_UNUSED (0) +#define IRQ_USED (1) +#define IRQ_RSVD (2) + +int ia64_first_device_vector = IA64_DEF_FIRST_DEVICE_VECTOR; +int ia64_last_device_vector = IA64_DEF_LAST_DEVICE_VECTOR; + +/* default base addr of IPI table */ +void __iomem *ipi_base_addr = ((void __iomem *) + (__IA64_UNCACHED_OFFSET | IA64_IPI_DEFAULT_BASE_ADDR)); + +static cpumask_t vector_allocation_domain(int cpu); + +/* + * Legacy IRQ to IA-64 vector translation table. + */ +__u8 isa_irq_to_vector_map[16] = { + /* 8259 IRQ translation, first 16 entries */ + 0x2f, 0x20, 0x2e, 0x2d, 0x2c, 0x2b, 0x2a, 0x29, + 0x28, 0x27, 0x26, 0x25, 0x24, 0x23, 0x22, 0x21 +}; +EXPORT_SYMBOL(isa_irq_to_vector_map); + +DEFINE_SPINLOCK(vector_lock); + +struct irq_cfg irq_cfg[NR_IRQS] __read_mostly = { + [0 ... NR_IRQS - 1] = { + .vector = IRQ_VECTOR_UNASSIGNED, + .domain = CPU_MASK_NONE + } +}; + +DEFINE_PER_CPU(int[IA64_NUM_VECTORS], vector_irq) = { + [0 ... IA64_NUM_VECTORS - 1] = -1 +}; + +static cpumask_t vector_table[IA64_NUM_VECTORS] = { + [0 ... IA64_NUM_VECTORS - 1] = CPU_MASK_NONE +}; + +static int irq_status[NR_IRQS] = { + [0 ... NR_IRQS -1] = IRQ_UNUSED +}; + +static inline int find_unassigned_irq(void) +{ + int irq; + + for (irq = IA64_FIRST_DEVICE_VECTOR; irq < NR_IRQS; irq++) + if (irq_status[irq] == IRQ_UNUSED) + return irq; + return -ENOSPC; +} + +static inline int find_unassigned_vector(cpumask_t domain) +{ + cpumask_t mask; + int pos, vector; + + cpumask_and(&mask, &domain, cpu_online_mask); + if (cpumask_empty(&mask)) + return -EINVAL; + + for (pos = 0; pos < IA64_NUM_DEVICE_VECTORS; pos++) { + vector = IA64_FIRST_DEVICE_VECTOR + pos; + cpumask_and(&mask, &domain, &vector_table[vector]); + if (!cpumask_empty(&mask)) + continue; + return vector; + } + return -ENOSPC; +} + +static int __bind_irq_vector(int irq, int vector, cpumask_t domain) +{ + cpumask_t mask; + int cpu; + struct irq_cfg *cfg = &irq_cfg[irq]; + + BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON((unsigned)vector >= IA64_NUM_VECTORS); + + cpumask_and(&mask, &domain, cpu_online_mask); + if (cpumask_empty(&mask)) + return -EINVAL; + if ((cfg->vector == vector) && cpumask_equal(&cfg->domain, &domain)) + return 0; + if (cfg->vector != IRQ_VECTOR_UNASSIGNED) + return -EBUSY; + for_each_cpu(cpu, &mask) + per_cpu(vector_irq, cpu)[vector] = irq; + cfg->vector = vector; + cfg->domain = domain; + irq_status[irq] = IRQ_USED; + cpumask_or(&vector_table[vector], &vector_table[vector], &domain); + return 0; +} + +int bind_irq_vector(int irq, int vector, cpumask_t domain) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __bind_irq_vector(irq, vector, domain); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +static void __clear_irq_vector(int irq) +{ + int vector, cpu; + cpumask_t domain; + struct irq_cfg *cfg = &irq_cfg[irq]; + + BUG_ON((unsigned)irq >= NR_IRQS); + BUG_ON(cfg->vector == IRQ_VECTOR_UNASSIGNED); + vector = cfg->vector; + domain = cfg->domain; + for_each_cpu_and(cpu, &cfg->domain, cpu_online_mask) + per_cpu(vector_irq, cpu)[vector] = -1; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; + irq_status[irq] = IRQ_UNUSED; + cpumask_andnot(&vector_table[vector], &vector_table[vector], &domain); +} + +static void clear_irq_vector(int irq) +{ + unsigned long flags; + + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + spin_unlock_irqrestore(&vector_lock, flags); +} + +int +ia64_native_assign_irq_vector (int irq) +{ + unsigned long flags; + int vector, cpu; + cpumask_t domain = CPU_MASK_NONE; + + vector = -ENOSPC; + + spin_lock_irqsave(&vector_lock, flags); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } + if (vector < 0) + goto out; + if (irq == AUTO_ASSIGN) + irq = vector; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + out: + spin_unlock_irqrestore(&vector_lock, flags); + return vector; +} + +void +ia64_native_free_irq_vector (int vector) +{ + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) + return; + clear_irq_vector(vector); +} + +int +reserve_irq_vector (int vector) +{ + if (vector < IA64_FIRST_DEVICE_VECTOR || + vector > IA64_LAST_DEVICE_VECTOR) + return -EINVAL; + return !!bind_irq_vector(vector, vector, CPU_MASK_ALL); +} + +/* + * Initialize vector_irq on a new cpu. This function must be called + * with vector_lock held. + */ +void __setup_vector_irq(int cpu) +{ + int irq, vector; + + /* Clear vector_irq */ + for (vector = 0; vector < IA64_NUM_VECTORS; ++vector) + per_cpu(vector_irq, cpu)[vector] = -1; + /* Mark the inuse vectors */ + for (irq = 0; irq < NR_IRQS; ++irq) { + if (!cpumask_test_cpu(cpu, &irq_cfg[irq].domain)) + continue; + vector = irq_to_vector(irq); + per_cpu(vector_irq, cpu)[vector] = irq; + } +} + +#ifdef CONFIG_SMP + +static enum vector_domain_type { + VECTOR_DOMAIN_NONE, + VECTOR_DOMAIN_PERCPU +} vector_domain_type = VECTOR_DOMAIN_NONE; + +static cpumask_t vector_allocation_domain(int cpu) +{ + if (vector_domain_type == VECTOR_DOMAIN_PERCPU) + return *cpumask_of(cpu); + return CPU_MASK_ALL; +} + +static int __irq_prepare_move(int irq, int cpu) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + int vector; + cpumask_t domain; + + if (cfg->move_in_progress || cfg->move_cleanup_count) + return -EBUSY; + if (cfg->vector == IRQ_VECTOR_UNASSIGNED || !cpu_online(cpu)) + return -EINVAL; + if (cpumask_test_cpu(cpu, &cfg->domain)) + return 0; + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector < 0) + return -ENOSPC; + cfg->move_in_progress = 1; + cfg->old_domain = cfg->domain; + cfg->vector = IRQ_VECTOR_UNASSIGNED; + cfg->domain = CPU_MASK_NONE; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + return 0; +} + +int irq_prepare_move(int irq, int cpu) +{ + unsigned long flags; + int ret; + + spin_lock_irqsave(&vector_lock, flags); + ret = __irq_prepare_move(irq, cpu); + spin_unlock_irqrestore(&vector_lock, flags); + return ret; +} + +void irq_complete_move(unsigned irq) +{ + struct irq_cfg *cfg = &irq_cfg[irq]; + cpumask_t cleanup_mask; + int i; + + if (likely(!cfg->move_in_progress)) + return; + + if (unlikely(cpumask_test_cpu(smp_processor_id(), &cfg->old_domain))) + return; + + cpumask_and(&cleanup_mask, &cfg->old_domain, cpu_online_mask); + cfg->move_cleanup_count = cpumask_weight(&cleanup_mask); + for_each_cpu(i, &cleanup_mask) + ia64_send_ipi(i, IA64_IRQ_MOVE_VECTOR, IA64_IPI_DM_INT, 0); + cfg->move_in_progress = 0; +} + +static irqreturn_t smp_irq_move_cleanup_interrupt(int irq, void *dev_id) +{ + int me = smp_processor_id(); + ia64_vector vector; + unsigned long flags; + + for (vector = IA64_FIRST_DEVICE_VECTOR; + vector < IA64_LAST_DEVICE_VECTOR; vector++) { + int irq; + struct irq_desc *desc; + struct irq_cfg *cfg; + irq = __this_cpu_read(vector_irq[vector]); + if (irq < 0) + continue; + + desc = irq_to_desc(irq); + cfg = irq_cfg + irq; + raw_spin_lock(&desc->lock); + if (!cfg->move_cleanup_count) + goto unlock; + + if (!cpumask_test_cpu(me, &cfg->old_domain)) + goto unlock; + + spin_lock_irqsave(&vector_lock, flags); + __this_cpu_write(vector_irq[vector], -1); + cpumask_clear_cpu(me, &vector_table[vector]); + spin_unlock_irqrestore(&vector_lock, flags); + cfg->move_cleanup_count--; + unlock: + raw_spin_unlock(&desc->lock); + } + return IRQ_HANDLED; +} + +static int __init parse_vector_domain(char *arg) +{ + if (!arg) + return -EINVAL; + if (!strcmp(arg, "percpu")) { + vector_domain_type = VECTOR_DOMAIN_PERCPU; + no_int_routing = 1; + } + return 0; +} +early_param("vector", parse_vector_domain); +#else +static cpumask_t vector_allocation_domain(int cpu) +{ + return CPU_MASK_ALL; +} +#endif + + +void destroy_and_reserve_irq(unsigned int irq) +{ + unsigned long flags; + + irq_init_desc(irq); + spin_lock_irqsave(&vector_lock, flags); + __clear_irq_vector(irq); + irq_status[irq] = IRQ_RSVD; + spin_unlock_irqrestore(&vector_lock, flags); +} + +/* + * Dynamic irq allocate and deallocation for MSI + */ +int create_irq(void) +{ + unsigned long flags; + int irq, vector, cpu; + cpumask_t domain = CPU_MASK_NONE; + + irq = vector = -ENOSPC; + spin_lock_irqsave(&vector_lock, flags); + for_each_online_cpu(cpu) { + domain = vector_allocation_domain(cpu); + vector = find_unassigned_vector(domain); + if (vector >= 0) + break; + } + if (vector < 0) + goto out; + irq = find_unassigned_irq(); + if (irq < 0) + goto out; + BUG_ON(__bind_irq_vector(irq, vector, domain)); + out: + spin_unlock_irqrestore(&vector_lock, flags); + if (irq >= 0) + irq_init_desc(irq); + return irq; +} + +void destroy_irq(unsigned int irq) +{ + irq_init_desc(irq); + clear_irq_vector(irq); +} + +#ifdef CONFIG_SMP +# define IS_RESCHEDULE(vec) (vec == IA64_IPI_RESCHEDULE) +# define IS_LOCAL_TLB_FLUSH(vec) (vec == IA64_IPI_LOCAL_TLB_FLUSH) +#else +# define IS_RESCHEDULE(vec) (0) +# define IS_LOCAL_TLB_FLUSH(vec) (0) +#endif +/* + * That's where the IVT branches when we get an external + * interrupt. This branches to the correct hardware IRQ handler via + * function ptr. + */ +void +ia64_handle_irq (ia64_vector vector, struct pt_regs *regs) +{ + struct pt_regs *old_regs = set_irq_regs(regs); + unsigned long saved_tpr; + +#if IRQ_DEBUG + { + unsigned long bsp, sp; + + /* + * Note: if the interrupt happened while executing in + * the context switch routine (ia64_switch_to), we may + * get a spurious stack overflow here. This is + * because the register and the memory stack are not + * switched atomically. + */ + bsp = ia64_getreg(_IA64_REG_AR_BSP); + sp = ia64_getreg(_IA64_REG_SP); + + if ((sp - bsp) < 1024) { + static DEFINE_RATELIMIT_STATE(ratelimit, 5 * HZ, 5); + + if (__ratelimit(&ratelimit)) { + printk("ia64_handle_irq: DANGER: less than " + "1KB of free stack space!!\n" + "(bsp=0x%lx, sp=%lx)\n", bsp, sp); + } + } + } +#endif /* IRQ_DEBUG */ + + /* + * Always set TPR to limit maximum interrupt nesting depth to + * 16 (without this, it would be ~240, which could easily lead + * to kernel stack overflows). + */ + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + while (vector != IA64_SPURIOUS_INT_VECTOR) { + int irq = local_vector_to_irq(vector); + + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_incr_irq_this_cpu(irq); + } else if (unlikely(IS_RESCHEDULE(vector))) { + scheduler_ipi(); + kstat_incr_irq_this_cpu(irq); + } else { + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + if (unlikely(irq < 0)) { + printk(KERN_ERR "%s: Unexpected interrupt " + "vector %d on CPU %d is not mapped " + "to any IRQ!\n", __func__, vector, + smp_processor_id()); + } else + generic_handle_irq(irq); + + /* + * Disable interrupts and send EOI: + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + /* + * This must be done *after* the ia64_eoi(). For example, the keyboard softirq + * handler needs to be able to wait for further keyboard interrupts, which can't + * come through until ia64_eoi() has been done. + */ + irq_exit(); + set_irq_regs(old_regs); +} + +#ifdef CONFIG_HOTPLUG_CPU +/* + * This function emulates a interrupt processing when a cpu is about to be + * brought down. + */ +void ia64_process_pending_intr(void) +{ + ia64_vector vector; + unsigned long saved_tpr; + extern unsigned int vectors_in_migration[NR_IRQS]; + + vector = ia64_get_ivr(); + + irq_enter(); + saved_tpr = ia64_getreg(_IA64_REG_CR_TPR); + ia64_srlz_d(); + + /* + * Perform normal interrupt style processing + */ + while (vector != IA64_SPURIOUS_INT_VECTOR) { + int irq = local_vector_to_irq(vector); + + if (unlikely(IS_LOCAL_TLB_FLUSH(vector))) { + smp_local_flush_tlb(); + kstat_incr_irq_this_cpu(irq); + } else if (unlikely(IS_RESCHEDULE(vector))) { + kstat_incr_irq_this_cpu(irq); + } else { + struct pt_regs *old_regs = set_irq_regs(NULL); + + ia64_setreg(_IA64_REG_CR_TPR, vector); + ia64_srlz_d(); + + /* + * Now try calling normal ia64_handle_irq as it would have got called + * from a real intr handler. Try passing null for pt_regs, hopefully + * it will work. I hope it works!. + * Probably could shared code. + */ + if (unlikely(irq < 0)) { + printk(KERN_ERR "%s: Unexpected interrupt " + "vector %d on CPU %d not being mapped " + "to any IRQ!!\n", __func__, vector, + smp_processor_id()); + } else { + vectors_in_migration[irq]=0; + generic_handle_irq(irq); + } + set_irq_regs(old_regs); + + /* + * Disable interrupts and send EOI + */ + local_irq_disable(); + ia64_setreg(_IA64_REG_CR_TPR, saved_tpr); + } + ia64_eoi(); + vector = ia64_get_ivr(); + } + irq_exit(); +} +#endif + + +#ifdef CONFIG_SMP + +static irqreturn_t dummy_handler (int irq, void *dev_id) +{ + BUG(); + return IRQ_NONE; +} + +/* + * KVM uses this interrupt to force a cpu out of guest mode + */ + +#endif + +void +register_percpu_irq(ia64_vector vec, irq_handler_t handler, unsigned long flags, + const char *name) +{ + unsigned int irq; + + irq = vec; + BUG_ON(bind_irq_vector(irq, vec, CPU_MASK_ALL)); + irq_set_status_flags(irq, IRQ_PER_CPU); + irq_set_chip(irq, &irq_type_ia64_lsapic); + if (handler) + if (request_irq(irq, handler, flags, name, NULL)) + pr_err("Failed to request irq %u (%s)\n", irq, name); + irq_set_handler(irq, handle_percpu_irq); +} + +void __init +ia64_native_register_ipi(void) +{ +#ifdef CONFIG_SMP + register_percpu_irq(IA64_IPI_VECTOR, handle_IPI, 0, "IPI"); + register_percpu_irq(IA64_IPI_RESCHEDULE, dummy_handler, 0, "resched"); + register_percpu_irq(IA64_IPI_LOCAL_TLB_FLUSH, dummy_handler, 0, + "tlb_flush"); +#endif +} + +void __init +init_IRQ (void) +{ + acpi_boot_init(); + ia64_register_ipi(); + register_percpu_irq(IA64_SPURIOUS_INT_VECTOR, NULL, 0, NULL); +#ifdef CONFIG_SMP + if (vector_domain_type != VECTOR_DOMAIN_NONE) { + register_percpu_irq(IA64_IRQ_MOVE_VECTOR, + smp_irq_move_cleanup_interrupt, 0, + "irq_move"); + } +#endif +} + +void +ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect) +{ + void __iomem *ipi_addr; + unsigned long ipi_data; + unsigned long phys_cpu_id; + + phys_cpu_id = cpu_physical_id(cpu); + + /* + * cpu number is in 8bit ID and 8bit EID + */ + + ipi_data = (delivery_mode << 8) | (vector & 0xff); + ipi_addr = ipi_base_addr + ((phys_cpu_id << 4) | ((redirect & 1) << 3)); + + writeq(ipi_data, ipi_addr); +} diff --git a/arch/ia64/kernel/irq_lsapic.c b/arch/ia64/kernel/irq_lsapic.c new file mode 100644 index 000000000000..23bf4499a75d --- /dev/null +++ b/arch/ia64/kernel/irq_lsapic.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * LSAPIC Interrupt Controller + * + * This takes care of interrupts that are generated by the CPU's + * internal Streamlined Advanced Programmable Interrupt Controller + * (LSAPIC), such as the ITC and IPI interrupts. + * + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang + */ + +#include +#include + +static unsigned int +lsapic_noop_startup (struct irq_data *data) +{ + return 0; +} + +static void +lsapic_noop (struct irq_data *data) +{ + /* nothing to do... */ +} + +static int lsapic_retrigger(struct irq_data *data) +{ + ia64_resend_irq(data->irq); + + return 1; +} + +struct irq_chip irq_type_ia64_lsapic = { + .name = "LSAPIC", + .irq_startup = lsapic_noop_startup, + .irq_shutdown = lsapic_noop, + .irq_enable = lsapic_noop, + .irq_disable = lsapic_noop, + .irq_ack = lsapic_noop, + .irq_retrigger = lsapic_retrigger, +}; diff --git a/arch/ia64/kernel/ivt.S b/arch/ia64/kernel/ivt.S new file mode 100644 index 000000000000..da90c49df628 --- /dev/null +++ b/arch/ia64/kernel/ivt.S @@ -0,0 +1,1688 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * arch/ia64/kernel/ivt.S + * + * Copyright (C) 1998-2001, 2003, 2005 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * Copyright (C) 2000, 2002-2003 Intel Co + * Asit Mallick + * Suresh Siddha + * Kenneth Chen + * Fenghua Yu + * + * 00/08/23 Asit Mallick TLB handling for SMP + * 00/12/20 David Mosberger-Tang DTLB/ITLB handler now uses virtual PT. + * + * Copyright (C) 2005 Hewlett-Packard Co + * Dan Magenheimer + * Xen paravirtualization + * Copyright (c) 2008 Isaku Yamahata + * VA Linux Systems Japan K.K. + * pv_ops. + * Yaozu (Eddie) Dong + */ +/* + * This file defines the interruption vector table used by the CPU. + * It does not include one entry per possible cause of interruption. + * + * The first 20 entries of the table contain 64 bundles each while the + * remaining 48 entries contain only 16 bundles each. + * + * The 64 bundles are used to allow inlining the whole handler for critical + * interruptions like TLB misses. + * + * For each entry, the comment is as follows: + * + * // 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) + * entry offset ----/ / / / / + * entry number ---------/ / / / + * size of the entry -------------/ / / + * vector name -------------------------------------/ / + * interruptions triggering this vector ----------------------/ + * + * The table is 32KB in size and must be aligned on 32KB boundary. + * (The CPU ignores the 15 lower bits of the address) + * + * Table is based upon EAS2.6 (Oct 1999) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if 0 +# define PSR_DEFAULT_BITS psr.ac +#else +# define PSR_DEFAULT_BITS 0 +#endif + +#if 0 + /* + * This lets you track the last eight faults that occurred on the CPU. Make sure ar.k2 isn't + * needed for something else before enabling this... + */ +# define DBG_FAULT(i) mov r16=ar.k2;; shl r16=r16,8;; add r16=(i),r16;;mov ar.k2=r16 +#else +# define DBG_FAULT(i) +#endif + +#include "minstate.h" + +#define FAULT(n) \ + mov r31=pr; \ + mov r19=n;; /* prepare to save predicates */ \ + br.sptk.many dispatch_to_fault_handler + + .section .text..ivt,"ax" + + .align 32768 // align on 32KB boundary + .global ia64_ivt + EXPORT_SYMBOL(ia64_ivt) +ia64_ivt: +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0000 Entry 0 (size 64 bundles) VHPT Translation (8,20,47) +ENTRY(vhpt_miss) + DBG_FAULT(0) + /* + * The VHPT vector is invoked when the TLB entry for the virtual page table + * is missing. This happens only as a result of a previous + * (the "original") TLB miss, which may either be caused by an instruction + * fetch or a data access (or non-access). + * + * What we do here is normal TLB miss handing for the _original_ miss, + * followed by inserting the TLB entry for the virtual page table page + * that the VHPT walker was attempting to access. The latter gets + * inserted as long as page table entry above pte level have valid + * mappings for the faulting address. The TLB entry for the original + * miss gets inserted only if the pte entry indicates that the page is + * present. + * + * do_page_fault gets invoked in the following cases: + * - the faulting virtual address uses unimplemented address bits + * - the faulting virtual address has no valid page table mapping + */ + MOV_FROM_IFA(r16) // get address that caused the TLB miss +#ifdef CONFIG_HUGETLB_PAGE + movl r18=PAGE_SHIFT + MOV_FROM_ITIR(r25) +#endif + ;; + RSM_PSR_DT // use physical addressing for data + mov r31=pr // save the predicate registers + mov r19=IA64_KR(PT_BASE) // get page table base address + shl r21=r16,3 // shift bit 60 into sign bit + shr.u r17=r16,61 // get the region number into r17 + ;; + shr.u r22=r21,3 +#ifdef CONFIG_HUGETLB_PAGE + extr.u r26=r25,2,6 + ;; + cmp.ne p8,p0=r18,r26 + sub r27=r26,r18 + ;; +(p8) dep r25=r18,r25,2,6 +(p8) shr r22=r22,r27 +#endif + ;; + cmp.eq p6,p7=5,r17 // is IFA pointing into to region 5? + shr.u r18=r22,PGDIR_SHIFT // get bottom portion of pgd index bit + ;; +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? +#if CONFIG_PGTABLE_LEVELS == 4 + shr.u r28=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? +#if CONFIG_PGTABLE_LEVELS == 4 + dep r28=r28,r17,3,(PAGE_SHIFT-3) // r28=pud_offset(pgd,addr) + ;; + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +(p7) ld8 r29=[r28] // get *pud (may be 0) + ;; +(p7) cmp.eq.or.andcm p6,p7=r29,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r29,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) +#else + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pgd,addr) +#endif + ;; +(p7) ld8 r20=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r20,r0 // was pmd_present(*pmd) == NULL? + dep r21=r19,r20,3,(PAGE_SHIFT-3) // r21=pte_offset(pmd,addr) + ;; +(p7) ld8 r18=[r21] // read *pte + MOV_FROM_ISR(r19) // cr.isr bit 32 tells us if this is an insn miss + ;; +(p7) tbit.z p6,p7=r18,_PAGE_P_BIT // page present bit cleared? + MOV_FROM_IHA(r22) // get the VHPT address that caused the TLB miss + ;; // avoid RAW on p7 +(p7) tbit.nz.unc p10,p11=r19,32 // is it an instruction TLB miss? + dep r23=0,r20,0,PAGE_SHIFT // clear low bits to get page address + ;; + ITC_I_AND_D(p10, p11, r18, r24) // insert the instruction TLB entry and + // insert the data TLB entry +(p6) br.cond.spnt.many page_fault // handle bad address/page not present (page fault) + MOV_TO_IFA(r22, r24) + +#ifdef CONFIG_HUGETLB_PAGE + MOV_TO_ITIR(p8, r25, r24) // change to default page-size for VHPT +#endif + + /* + * Now compute and insert the TLB entry for the virtual page table. We never + * execute in a page table page so there is no need to set the exception deferral + * bit. + */ + adds r24=__DIRTY_BITS_NO_ED|_PAGE_PL_0|_PAGE_AR_RW,r23 + ;; + ITC_D(p7, r24, r25) + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + /* + * Re-check pagetable entry. If they changed, we may have received a ptc.g + * between reading the pagetable and the "itc". If so, flush the entry we + * inserted and retry. At this point, we have: + * + * r28 = equivalent of pud_offset(pgd, ifa) + * r17 = equivalent of pmd_offset(pud, ifa) + * r21 = equivalent of pte_offset(pmd, ifa) + * + * r29 = *pud + * r20 = *pmd + * r18 = *pte + */ + ld8 r25=[r21] // read *pte again + ld8 r26=[r17] // read *pmd again +#if CONFIG_PGTABLE_LEVELS == 4 + ld8 r19=[r28] // read *pud again +#endif + cmp.ne p6,p7=r0,r0 + ;; + cmp.ne.or.andcm p6,p7=r26,r20 // did *pmd change +#if CONFIG_PGTABLE_LEVELS == 4 + cmp.ne.or.andcm p6,p7=r19,r29 // did *pud change +#endif + mov r27=PAGE_SHIFT<<2 + ;; +(p6) ptc.l r22,r27 // purge PTE page translation +(p7) cmp.ne.or.andcm p6,p7=r25,r18 // did *pte change + ;; +(p6) ptc.l r16,r27 // purge translation +#endif + + mov pr=r31,-1 // restore predicate registers + RFI +END(vhpt_miss) + + .org ia64_ivt+0x400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0400 Entry 1 (size 64 bundles) ITLB (21) +ENTRY(itlb_miss) + DBG_FAULT(1) + /* + * The ITLB handler accesses the PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. + */ + MOV_FROM_IFA(r16) // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +.itlb_fault: + MOV_FROM_IHA(r17) // get virtual address of PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read *pte + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + ITC_I(p0, r18, r19) + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read *pte again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + RFI +END(itlb_miss) + + .org ia64_ivt+0x0800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0800 Entry 2 (size 64 bundles) DTLB (9,48) +ENTRY(dtlb_miss) + DBG_FAULT(2) + /* + * The DTLB handler accesses the PTE via the virtually mapped linear + * page table. If a nested TLB miss occurs, we switch into physical + * mode, walk the page table, and then re-execute the PTE read and + * go on normally after that. + */ + MOV_FROM_IFA(r16) // get virtual address + mov r29=b0 // save b0 + mov r31=pr // save predicates +dtlb_fault: + MOV_FROM_IHA(r17) // get virtual address of PTE + movl r30=1f // load nested fault continuation point + ;; +1: ld8 r18=[r17] // read *pte + ;; + mov b0=r29 + tbit.z p6,p0=r18,_PAGE_P_BIT // page present bit cleared? +(p6) br.cond.spnt page_fault + ;; + ITC_D(p0, r18, r19) + ;; +#ifdef CONFIG_SMP + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r19=[r17] // read *pte again and see if same + mov r20=PAGE_SHIFT<<2 // setup page size for purge + ;; + cmp.ne p7,p0=r18,r19 + ;; +(p7) ptc.l r16,r20 +#endif + mov pr=r31,-1 + RFI +END(dtlb_miss) + + .org ia64_ivt+0x0c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x0c00 Entry 3 (size 64 bundles) Alt ITLB (19) +ENTRY(alt_itlb_miss) + DBG_FAULT(3) + MOV_FROM_IFA(r16) // get address that caused the TLB miss + movl r17=PAGE_KERNEL + MOV_FROM_IPSR(p0, r21) + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + mov r31=pr + ;; +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // user mode + ;; + THASH(p8, r17, r16, r23) + ;; + MOV_TO_IHA(p8, r17, r23) +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk .itlb_fault +#endif + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r19=r19,r16 // clear ed, reserved bits, and PTE control bits + shr.u r18=r16,57 // move address bit 61 to bit 4 + ;; + andcm r18=0x10,r18 // bit 4=~address-bit(61) + cmp.ne p8,p0=r0,r23 // psr.cpl != 0? + or r19=r17,r19 // insert PTE control bits into r19 + ;; + or r19=r19,r18 // set bit 4 (uncached) if the access was to region 6 +(p8) br.cond.spnt page_fault + ;; + ITC_I(p0, r19, r18) // insert the TLB entry + mov pr=r31,-1 + RFI +END(alt_itlb_miss) + + .org ia64_ivt+0x1000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46) +ENTRY(alt_dtlb_miss) + DBG_FAULT(4) + MOV_FROM_IFA(r16) // get address that caused the TLB miss + movl r17=PAGE_KERNEL + MOV_FROM_ISR(r20) + movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff) + MOV_FROM_IPSR(p0, r21) + mov r31=pr + mov r24=PERCPU_ADDR + ;; +#ifdef CONFIG_DISABLE_VHPT + shr.u r22=r16,61 // get the region number into r21 + ;; + cmp.gt p8,p0=6,r22 // access to region 0-5 + ;; + THASH(p8, r17, r16, r25) + ;; + MOV_TO_IHA(p8, r17, r25) +(p8) mov r29=b0 // save b0 +(p8) br.cond.dptk dtlb_fault +#endif + cmp.ge p10,p11=r16,r24 // access to per_cpu_data? + tbit.z p12,p0=r16,61 // access to region 6? + mov r25=PERCPU_PAGE_SHIFT << 2 + mov r26=PERCPU_PAGE_SIZE + nop.m 0 + nop.b 0 + ;; +(p10) mov r19=IA64_KR(PER_CPU_DATA) +(p11) and r19=r19,r16 // clear non-ppn fields + extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl + and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field + tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on? + tbit.nz p9,p0=r20,IA64_ISR_NA_BIT // is non-access bit on? + ;; +(p10) sub r19=r19,r26 + MOV_TO_ITIR(p10, r25, r24) + cmp.ne p8,p0=r0,r23 +(p9) cmp.eq.or.andcm p6,p7=IA64_ISR_CODE_LFETCH,r22 // check isr.code field +(p12) dep r17=-1,r17,4,1 // set ma=UC for region 6 addr +(p8) br.cond.spnt page_fault + + dep r21=-1,r21,IA64_PSR_ED_BIT,1 + ;; + or r19=r19,r17 // insert PTE control bits into r19 + MOV_TO_IPSR(p6, r21, r24) + ;; + ITC_D(p7, r19, r18) // insert the TLB entry + mov pr=r31,-1 + RFI +END(alt_dtlb_miss) + + .org ia64_ivt+0x1400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1400 Entry 5 (size 64 bundles) Data nested TLB (6,45) +ENTRY(nested_dtlb_miss) + /* + * In the absence of kernel bugs, we get here when the virtually mapped linear + * page table is accessed non-speculatively (e.g., in the Dirty-bit, Instruction + * Access-bit, or Data Access-bit faults). If the DTLB entry for the virtual page + * table is missing, a nested TLB miss fault is triggered and control is + * transferred to this point. When this happens, we lookup the pte for the + * faulting address by walking the page table in physical mode and return to the + * continuation point passed in register r30 (or call page_fault if the address is + * not mapped). + * + * Input: r16: faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Output: r17: physical address of PTE of faulting address + * r29: saved b0 + * r30: continuation address + * r31: saved pr + * + * Clobbered: b0, r18, r19, r21, r22, psr.dt (cleared) + */ + RSM_PSR_DT // switch to using physical data addressing + mov r19=IA64_KR(PT_BASE) // get the page table base address + shl r21=r16,3 // shift bit 60 into sign bit + MOV_FROM_ITIR(r18) + ;; + shr.u r17=r16,61 // get the region number into r17 + extr.u r18=r18,2,6 // get the faulting page size + ;; + cmp.eq p6,p7=5,r17 // is faulting address in region 5? + add r22=-PAGE_SHIFT,r18 // adjustment for hugetlb address + add r18=PGDIR_SHIFT-PAGE_SHIFT,r18 + ;; + shr.u r22=r16,r22 + shr.u r18=r16,r18 +(p7) dep r17=r17,r19,(PAGE_SHIFT-3),3 // put region number bits in place + + srlz.d + LOAD_PHYSICAL(p6, r19, swapper_pg_dir) // region 5 is rooted at swapper_pg_dir + + .pred.rel "mutex", p6, p7 +(p6) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT +(p7) shr.u r21=r21,PGDIR_SHIFT+PAGE_SHIFT-3 + ;; +(p6) dep r17=r18,r19,3,(PAGE_SHIFT-3) // r17=pgd_offset for region 5 +(p7) dep r17=r18,r17,3,(PAGE_SHIFT-6) // r17=pgd_offset for region[0-4] + cmp.eq p7,p6=0,r21 // unused address bits all zeroes? +#if CONFIG_PGTABLE_LEVELS == 4 + shr.u r18=r22,PUD_SHIFT // shift pud index into position +#else + shr.u r18=r22,PMD_SHIFT // shift pmd index into position +#endif + ;; + ld8 r17=[r17] // get *pgd (may be 0) + ;; +(p7) cmp.eq p6,p7=r17,r0 // was pgd_present(*pgd) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=p[u|m]d_offset(pgd,addr) + ;; +#if CONFIG_PGTABLE_LEVELS == 4 +(p7) ld8 r17=[r17] // get *pud (may be 0) + shr.u r18=r22,PMD_SHIFT // shift pmd index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pud_present(*pud) == NULL? + dep r17=r18,r17,3,(PAGE_SHIFT-3) // r17=pmd_offset(pud,addr) + ;; +#endif +(p7) ld8 r17=[r17] // get *pmd (may be 0) + shr.u r19=r22,PAGE_SHIFT // shift pte index into position + ;; +(p7) cmp.eq.or.andcm p6,p7=r17,r0 // was pmd_present(*pmd) == NULL? + dep r17=r19,r17,3,(PAGE_SHIFT-3) // r17=pte_offset(pmd,addr); +(p6) br.cond.spnt page_fault + mov b0=r30 + br.sptk.many b0 // return to continuation point +END(nested_dtlb_miss) + + .org ia64_ivt+0x1800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1800 Entry 6 (size 64 bundles) Instruction Key Miss (24) +ENTRY(ikey_miss) + DBG_FAULT(6) + FAULT(6) +END(ikey_miss) + + .org ia64_ivt+0x1c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x1c00 Entry 7 (size 64 bundles) Data Key Miss (12,51) +ENTRY(dkey_miss) + DBG_FAULT(7) + FAULT(7) +END(dkey_miss) + + .org ia64_ivt+0x2000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2000 Entry 8 (size 64 bundles) Dirty-bit (54) +ENTRY(dirty_bit) + DBG_FAULT(8) + /* + * What we do here is to simply turn on the dirty bit in the PTE. We need to + * update both the page-table and the TLB entry. To efficiently access the PTE, + * we address it through the virtual page table. Most likely, the TLB entry for + * the relevant virtual page table page is still present in the TLB so we can + * normally do this without additional TLB misses. In case the necessary virtual + * page table TLB entry isn't present, we take a nested TLB miss hit where we look + * up the physical address of the L3 PTE and then continue at label 1 below. + */ + MOV_FROM_IFA(r16) // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault + mov r31=pr // save pr +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only update if page is present + mov r24=PAGE_SHIFT<<2 + ;; +(p6) cmp.eq p6,p7=r26,r18 // Only compare if page is present + ;; + ITC_D(p6, r25, r18) // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_D|_PAGE_A,r18 // set the dirty and accessed bits + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + ITC_D(p0, r18, r16) // install updated PTE +#endif + mov pr=r31,-1 // restore pr + RFI +END(dirty_bit) + + .org ia64_ivt+0x2400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2400 Entry 9 (size 64 bundles) Instruction Access-bit (27) +ENTRY(iaccess_bit) + DBG_FAULT(9) + // Like Entry 8, except for instruction access + MOV_FROM_IFA(r16) // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + mov r31=pr // save predicates +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) has "NoFix" status. + */ + MOV_FROM_IPSR(p0, r17) + ;; + MOV_FROM_IIP(r18) + tbit.z p6,p0=r17,IA64_PSR_IS_BIT // IA64 instruction set? + ;; +(p6) mov r16=r18 // if so, use cr.iip instead of cr.ifa +#endif /* CONFIG_ITANIUM */ + ;; + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the accessed bit + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page present + mov r24=PAGE_SHIFT<<2 + ;; +(p6) cmp.eq p6,p7=r26,r18 // Only if page present + ;; + ITC_I(p6, r25, r26) // install updated PTE + ;; + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov b0=r29 // restore b0 + mov ar.ccv=r28 +#else /* !CONFIG_SMP */ + ;; +1: ld8 r18=[r17] + ;; + or r18=_PAGE_A,r18 // set the accessed bit + mov b0=r29 // restore b0 + ;; + st8 [r17]=r18 // store back updated PTE + ITC_I(p0, r18, r16) // install updated PTE +#endif /* !CONFIG_SMP */ + mov pr=r31,-1 + RFI +END(iaccess_bit) + + .org ia64_ivt+0x2800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2800 Entry 10 (size 64 bundles) Data Access-bit (15,55) +ENTRY(daccess_bit) + DBG_FAULT(10) + // Like Entry 8, except for data access + MOV_FROM_IFA(r16) // get the address that caused the fault + movl r30=1f // load continuation point in case of nested fault + ;; + THASH(p0, r17, r16, r18) // compute virtual address of L3 PTE + mov r31=pr + mov r29=b0 // save b0 in case of nested fault) +#ifdef CONFIG_SMP + mov r28=ar.ccv // save ar.ccv + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + mov ar.ccv=r18 // set compare value for cmpxchg + or r25=_PAGE_A,r18 // set the dirty bit + tbit.z p7,p6 = r18,_PAGE_P_BIT // Check present bit + ;; +(p6) cmpxchg8.acq r26=[r17],r25,ar.ccv // Only if page is present + mov r24=PAGE_SHIFT<<2 + ;; +(p6) cmp.eq p6,p7=r26,r18 // Only if page is present + ;; + ITC_D(p6, r25, r26) // install updated PTE + /* + * Tell the assemblers dependency-violation checker that the above "itc" instructions + * cannot possibly affect the following loads: + */ + dv_serialize_data + ;; + ld8 r18=[r17] // read PTE again + ;; + cmp.eq p6,p7=r18,r25 // is it same as the newly installed + ;; +(p7) ptc.l r16,r24 + mov ar.ccv=r28 +#else + ;; +1: ld8 r18=[r17] + ;; // avoid RAW on r18 + or r18=_PAGE_A,r18 // set the accessed bit + ;; + st8 [r17]=r18 // store back updated PTE + ITC_D(p0, r18, r16) // install updated PTE +#endif + mov b0=r29 // restore b0 + mov pr=r31,-1 + RFI +END(daccess_bit) + + .org ia64_ivt+0x2c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x2c00 Entry 11 (size 64 bundles) Break instruction (33) +ENTRY(break_fault) + /* + * The streamlined system call entry/exit paths only save/restore the initial part + * of pt_regs. This implies that the callers of system-calls must adhere to the + * normal procedure calling conventions. + * + * Registers to be saved & restored: + * CR registers: cr.ipsr, cr.iip, cr.ifs + * AR registers: ar.unat, ar.pfs, ar.rsc, ar.rnat, ar.bspstore, ar.fpsr + * others: pr, b0, b6, loadrs, r1, r11, r12, r13, r15 + * Registers to be restored only: + * r8-r11: output value from the system call. + * + * During system call exit, scratch registers (including r15) are modified/cleared + * to prevent leaking bits from kernel to user level. + */ + DBG_FAULT(11) + mov.m r16=IA64_KR(CURRENT) // M2 r16 <- current task (12 cyc) + MOV_FROM_IPSR(p0, r29) // M2 (12 cyc) + mov r31=pr // I0 (2 cyc) + + MOV_FROM_IIM(r17) // M2 (2 cyc) + mov.m r27=ar.rsc // M2 (12 cyc) + mov r18=__IA64_BREAK_SYSCALL // A + + mov.m ar.rsc=0 // M2 + mov.m r21=ar.fpsr // M2 (12 cyc) + mov r19=b6 // I0 (2 cyc) + ;; + mov.m r23=ar.bspstore // M2 (12 cyc) + mov.m r24=ar.rnat // M2 (5 cyc) + mov.i r26=ar.pfs // I0 (2 cyc) + + invala // M0|1 + nop.m 0 // M + mov r20=r1 // A save r1 + + nop.m 0 + movl r30=sys_call_table // X + + MOV_FROM_IIP(r28) // M2 (2 cyc) + cmp.eq p0,p7=r18,r17 // I0 is this a system call? +(p7) br.cond.spnt non_syscall // B no -> + // + // From this point on, we are definitely on the syscall-path + // and we can use (non-banked) scratch registers. + // +/////////////////////////////////////////////////////////////////////// + mov r1=r16 // A move task-pointer to "addl"-addressable reg + mov r2=r16 // A setup r2 for ia64_syscall_setup + add r9=TI_FLAGS+IA64_TASK_SIZE,r16 // A r9 = ¤t_thread_info()->flags + + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 + adds r15=-1024,r15 // A subtract 1024 from syscall number + mov r3=NR_syscalls - 1 + ;; + ld1.bias r17=[r16] // M0|1 r17 = current->thread.on_ustack flag + ld4 r9=[r9] // M0|1 r9 = current_thread_info()->flags + extr.u r8=r29,41,2 // I0 extract ei field from cr.ipsr + + shladd r30=r15,3,r30 // A r30 = sys_call_table + 8*(syscall-1024) + addl r22=IA64_RBS_OFFSET,r1 // A compute base of RBS + cmp.leu p6,p7=r15,r3 // A syscall number in range? + ;; + + lfetch.fault.excl.nt1 [r22] // M0|1 prefetch RBS +(p6) ld8 r30=[r30] // M0|1 load address of syscall entry point + tnat.nz.or p7,p0=r15 // I0 is syscall nr a NaT? + + mov.m ar.bspstore=r22 // M2 switch to kernel RBS + cmp.eq p8,p9=2,r8 // A isr.ei==2? + ;; + +(p8) mov r8=0 // A clear ei to 0 +(p7) movl r30=sys_ni_syscall // X + +(p8) adds r28=16,r28 // A switch cr.iip to next bundle +(p9) adds r8=1,r8 // A increment ei to next slot +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + ;; + mov b6=r30 // I0 setup syscall handler branch reg early +#else + nop.i 0 + ;; +#endif + + mov.m r25=ar.unat // M2 (5 cyc) + dep r29=r8,r29,41,2 // I0 insert new ei into cr.ipsr + adds r15=1024,r15 // A restore original syscall number + // + // If any of the above loads miss in L1D, we'll stall here until + // the data arrives. + // +/////////////////////////////////////////////////////////////////////// + st1 [r16]=r0 // M2|3 clear current->thread.on_ustack flag +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + MOV_FROM_ITC(p0, p14, r30, r18) // M get cycle for accounting +#else + mov b6=r30 // I0 setup syscall handler branch reg early +#endif + cmp.eq pKStk,pUStk=r0,r17 // A were we on kernel stacks already? + + and r9=_TIF_SYSCALL_TRACEAUDIT,r9 // A mask trace or audit + mov r18=ar.bsp // M2 (12 cyc) +(pKStk) br.cond.spnt .break_fixup // B we're already in kernel-mode -- fix up RBS + ;; +.back_from_break_fixup: +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1 // A compute base of memory stack + cmp.eq p14,p0=r9,r0 // A are syscalls being traced/audited? + br.call.sptk.many b7=ia64_syscall_setup // B +1: +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + // mov.m r30=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 // A + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 // A +(pKStk) br.cond.spnt .skip_accounting // B unlikely skip + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // M get last stamp + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // M time at leave + ;; + ld8 r20=[r16],TI_AC_STAMP-TI_AC_STIME // M cumulated stime + ld8 r21=[r17] // M cumulated utime + sub r22=r19,r18 // A stime before leave + ;; + st8 [r16]=r30,TI_AC_STIME-TI_AC_STAMP // M update stamp + sub r18=r30,r19 // A elapsed time in user + ;; + add r20=r20,r22 // A sum stime + add r21=r21,r18 // A sum utime + ;; + st8 [r16]=r20 // M update stime + st8 [r17]=r21 // M update utime + ;; +.skip_accounting: +#endif + mov ar.rsc=0x3 // M2 set eager mode, pl 0, LE, loadrs=0 + nop 0 + BSW_1(r2, r14) // B (6 cyc) regs are saved, switch to bank 1 + ;; + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r16) // M2 now it's safe to re-enable intr.-collection + // M0 ensure interruption collection is on + movl r3=ia64_ret_from_syscall // X + ;; + mov rp=r3 // I0 set the real return addr +(p10) br.cond.spnt.many ia64_ret_from_syscall // B return if bad call-frame or r15 is a NaT + + SSM_PSR_I(p15, p15, r16) // M2 restore psr.i +(p14) br.call.sptk.many b6=b6 // B invoke syscall-handker (ignore return addr) + br.cond.spnt.many ia64_trace_syscall // B do syscall-tracing thingamagic + // NOT REACHED +/////////////////////////////////////////////////////////////////////// + // On entry, we optimistically assumed that we're coming from user-space. + // For the rare cases where a system-call is done from within the kernel, + // we fix things up at this point: +.break_fixup: + add r1=-IA64_PT_REGS_SIZE,sp // A allocate space for pt_regs structure + mov ar.rnat=r24 // M2 restore kernel's AR.RNAT + ;; + mov ar.bspstore=r23 // M2 restore kernel's AR.BSPSTORE + br.cond.sptk .back_from_break_fixup +END(break_fault) + + .org ia64_ivt+0x3000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3000 Entry 12 (size 64 bundles) External Interrupt (4) +ENTRY(interrupt) + /* interrupt handler has become too big to fit this area. */ + br.sptk.many __interrupt +END(interrupt) + + .org ia64_ivt+0x3400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3400 Entry 13 (size 64 bundles) Reserved + DBG_FAULT(13) + FAULT(13) + + .org ia64_ivt+0x3800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3800 Entry 14 (size 64 bundles) Reserved + DBG_FAULT(14) + FAULT(14) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + * + * ia64_syscall_setup() is a separate subroutine so that it can + * allocate stacked registers so it can safely demine any + * potential NaT values from the input registers. + * + * On entry: + * - executing on bank 0 or bank 1 register set (doesn't matter) + * - r1: stack pointer + * - r2: current task pointer + * - r3: preserved + * - r11: original contents (saved ar.pfs to be saved) + * - r12: original contents (sp to be saved) + * - r13: original contents (tp to be saved) + * - r15: original contents (syscall # to be saved) + * - r18: saved bsp (after switching to kernel stack) + * - r19: saved b6 + * - r20: saved r1 (gp) + * - r21: saved ar.fpsr + * - r22: kernel's register backing store base (krbs_base) + * - r23: saved ar.bspstore + * - r24: saved ar.rnat + * - r25: saved ar.unat + * - r26: saved ar.pfs + * - r27: saved ar.rsc + * - r28: saved cr.iip + * - r29: saved cr.ipsr + * - r30: ar.itc for accounting (don't touch) + * - r31: saved pr + * - b0: original contents (to be saved) + * On exit: + * - p10: TRUE if syscall is invoked with more than 8 out + * registers or r15's Nat is true + * - r1: kernel's gp + * - r3: preserved (same as on entry) + * - r8: -EINVAL if p10 is true + * - r12: points to kernel stack + * - r13: points to current task + * - r14: preserved (same as on entry) + * - p13: preserved + * - p15: TRUE if interrupts need to be re-enabled + * - ar.fpsr: set to kernel settings + * - b6: preserved (same as on entry) + */ +GLOBAL_ENTRY(ia64_syscall_setup) +#if PT(B6) != 0 +# error This code assumes that b6 is the first field in pt_regs. +#endif + st8 [r1]=r19 // save b6 + add r16=PT(CR_IPSR),r1 // initialize first base pointer + add r17=PT(R11),r1 // initialize second base pointer + ;; + alloc r19=ar.pfs,8,0,0,0 // ensure in0-in7 are writable + st8 [r16]=r29,PT(AR_PFS)-PT(CR_IPSR) // save cr.ipsr + tnat.nz p8,p0=in0 + + st8.spill [r17]=r11,PT(CR_IIP)-PT(R11) // save r11 + tnat.nz p9,p0=in1 +(pKStk) mov r18=r0 // make sure r18 isn't NaT + ;; + + st8 [r16]=r26,PT(CR_IFS)-PT(AR_PFS) // save ar.pfs + st8 [r17]=r28,PT(AR_UNAT)-PT(CR_IIP) // save cr.iip + mov r28=b0 // save b0 (2 cyc) + ;; + + st8 [r17]=r25,PT(AR_RSC)-PT(AR_UNAT) // save ar.unat + dep r19=0,r19,38,26 // clear all bits but 0..37 [I0] +(p8) mov in0=-1 + ;; + + st8 [r16]=r19,PT(AR_RNAT)-PT(CR_IFS) // store ar.pfs.pfm in cr.ifs + extr.u r11=r19,7,7 // I0 // get sol of ar.pfs + and r8=0x7f,r19 // A // get sof of ar.pfs + + st8 [r17]=r27,PT(AR_BSPSTORE)-PT(AR_RSC)// save ar.rsc + tbit.nz p15,p0=r29,IA64_PSR_I_BIT // I0 +(p9) mov in1=-1 + ;; + +(pUStk) sub r18=r18,r22 // r18=RSE.ndirty*8 + tnat.nz p10,p0=in2 + add r11=8,r11 + ;; +(pKStk) adds r16=PT(PR)-PT(AR_RNAT),r16 // skip over ar_rnat field +(pKStk) adds r17=PT(B0)-PT(AR_BSPSTORE),r17 // skip over ar_bspstore field + tnat.nz p11,p0=in3 + ;; +(p10) mov in2=-1 + tnat.nz p12,p0=in4 // [I0] +(p11) mov in3=-1 + ;; +(pUStk) st8 [r16]=r24,PT(PR)-PT(AR_RNAT) // save ar.rnat +(pUStk) st8 [r17]=r23,PT(B0)-PT(AR_BSPSTORE) // save ar.bspstore + shl r18=r18,16 // compute ar.rsc to be used for "loadrs" + ;; + st8 [r16]=r31,PT(LOADRS)-PT(PR) // save predicates + st8 [r17]=r28,PT(R1)-PT(B0) // save b0 + tnat.nz p13,p0=in5 // [I0] + ;; + st8 [r16]=r18,PT(R12)-PT(LOADRS) // save ar.rsc value for "loadrs" + st8.spill [r17]=r20,PT(R13)-PT(R1) // save original r1 +(p12) mov in4=-1 + ;; + +.mem.offset 0,0; st8.spill [r16]=r12,PT(AR_FPSR)-PT(R12) // save r12 +.mem.offset 8,0; st8.spill [r17]=r13,PT(R15)-PT(R13) // save r13 +(p13) mov in5=-1 + ;; + st8 [r16]=r21,PT(R8)-PT(AR_FPSR) // save ar.fpsr + tnat.nz p13,p0=in6 + cmp.lt p10,p9=r11,r8 // frame size can't be more than local+8 + ;; + mov r8=1 +(p9) tnat.nz p10,p0=r15 + adds r12=-16,r1 // switch to kernel memory stack (with 16 bytes of scratch) + + st8.spill [r17]=r15 // save r15 + tnat.nz p8,p0=in7 + nop.i 0 + + mov r13=r2 // establish `current' + movl r1=__gp // establish kernel global pointer + ;; + st8 [r16]=r8 // ensure pt_regs.r8 != 0 (see handle_syscall_error) +(p13) mov in6=-1 +(p8) mov in7=-1 + + cmp.eq pSys,pNonSys=r0,r0 // set pSys=1, pNonSys=0 + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value +(p10) mov r8=-EINVAL + br.ret.sptk.many b7 +END(ia64_syscall_setup) + + .org ia64_ivt+0x3c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x3c00 Entry 15 (size 64 bundles) Reserved + DBG_FAULT(15) + FAULT(15) + + .org ia64_ivt+0x4000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4000 Entry 16 (size 64 bundles) Reserved + DBG_FAULT(16) + FAULT(16) + +#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) + /* + * There is no particular reason for this code to be here, other than + * that there happens to be space here that would go unused otherwise. + * If this fault ever gets "unreserved", simply moved the following + * code to a more suitable spot... + * + * account_sys_enter is called from SAVE_MIN* macros if accounting is + * enabled and if the macro is entered from user mode. + */ +GLOBAL_ENTRY(account_sys_enter) + // mov.m r20=ar.itc is called in advance, and r13 is current + add r16=TI_AC_STAMP+IA64_TASK_SIZE,r13 + add r17=TI_AC_LEAVE+IA64_TASK_SIZE,r13 + ;; + ld8 r18=[r16],TI_AC_STIME-TI_AC_STAMP // time at last check in kernel + ld8 r19=[r17],TI_AC_UTIME-TI_AC_LEAVE // time at left from kernel + ;; + ld8 r23=[r16],TI_AC_STAMP-TI_AC_STIME // cumulated stime + ld8 r21=[r17] // cumulated utime + sub r22=r19,r18 // stime before leave kernel + ;; + st8 [r16]=r20,TI_AC_STIME-TI_AC_STAMP // update stamp + sub r18=r20,r19 // elapsed time in user mode + ;; + add r23=r23,r22 // sum stime + add r21=r21,r18 // sum utime + ;; + st8 [r16]=r23 // update stime + st8 [r17]=r21 // update utime + ;; + br.ret.sptk.many rp +END(account_sys_enter) +#endif + + .org ia64_ivt+0x4400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4400 Entry 17 (size 64 bundles) Reserved + DBG_FAULT(17) + FAULT(17) + + .org ia64_ivt+0x4800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4800 Entry 18 (size 64 bundles) Reserved + DBG_FAULT(18) + FAULT(18) + + .org ia64_ivt+0x4c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x4c00 Entry 19 (size 64 bundles) Reserved + DBG_FAULT(19) + FAULT(19) + +// +// --- End of long entries, Beginning of short entries +// + + .org ia64_ivt+0x5000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5000 Entry 20 (size 16 bundles) Page Not Present (10,22,49) +ENTRY(page_not_present) + DBG_FAULT(20) + MOV_FROM_IFA(r16) + RSM_PSR_DT + /* + * The Linux page fault handler doesn't expect non-present pages to be in + * the TLB. Flush the existing entry now, so we meet that expectation. + */ + mov r17=PAGE_SHIFT<<2 + ;; + ptc.l r16,r17 + ;; + mov r31=pr + srlz.d + br.sptk.many page_fault +END(page_not_present) + + .org ia64_ivt+0x5100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5100 Entry 21 (size 16 bundles) Key Permission (13,25,52) +ENTRY(key_permission) + DBG_FAULT(21) + MOV_FROM_IFA(r16) + RSM_PSR_DT + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(key_permission) + + .org ia64_ivt+0x5200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5200 Entry 22 (size 16 bundles) Instruction Access Rights (26) +ENTRY(iaccess_rights) + DBG_FAULT(22) + MOV_FROM_IFA(r16) + RSM_PSR_DT + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(iaccess_rights) + + .org ia64_ivt+0x5300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5300 Entry 23 (size 16 bundles) Data Access Rights (14,53) +ENTRY(daccess_rights) + DBG_FAULT(23) + MOV_FROM_IFA(r16) + RSM_PSR_DT + mov r31=pr + ;; + srlz.d + br.sptk.many page_fault +END(daccess_rights) + + .org ia64_ivt+0x5400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5400 Entry 24 (size 16 bundles) General Exception (5,32,34,36,38,39) +ENTRY(general_exception) + DBG_FAULT(24) + MOV_FROM_ISR(r16) + mov r31=pr + ;; + cmp4.eq p6,p0=0,r16 +(p6) br.sptk.many dispatch_illegal_op_fault + ;; + mov r19=24 // fault number + br.sptk.many dispatch_to_fault_handler +END(general_exception) + + .org ia64_ivt+0x5500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5500 Entry 25 (size 16 bundles) Disabled FP-Register (35) +ENTRY(disabled_fp_reg) + DBG_FAULT(25) + rsm psr.dfh // ensure we can access fph + ;; + srlz.d + mov r31=pr + mov r19=25 + br.sptk.many dispatch_to_fault_handler +END(disabled_fp_reg) + + .org ia64_ivt+0x5600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5600 Entry 26 (size 16 bundles) Nat Consumption (11,23,37,50) +ENTRY(nat_consumption) + DBG_FAULT(26) + + MOV_FROM_IPSR(p0, r16) + MOV_FROM_ISR(r17) + mov r31=pr // save PR + ;; + and r18=0xf,r17 // r18 = cr.ipsr.code{3:0} + tbit.z p6,p0=r17,IA64_ISR_NA_BIT + ;; + cmp.ne.or p6,p0=IA64_ISR_CODE_LFETCH,r18 + dep r16=-1,r16,IA64_PSR_ED_BIT,1 +(p6) br.cond.spnt 1f // branch if (cr.ispr.na == 0 || cr.ipsr.code{3:0} != LFETCH) + ;; + MOV_TO_IPSR(p0, r16, r18) + mov pr=r31,-1 + ;; + RFI + +1: mov pr=r31,-1 + ;; + FAULT(26) +END(nat_consumption) + + .org ia64_ivt+0x5700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5700 Entry 27 (size 16 bundles) Speculation (40) +ENTRY(speculation_vector) + DBG_FAULT(27) + /* + * A [f]chk.[as] instruction needs to take the branch to the recovery code but + * this part of the architecture is not implemented in hardware on some CPUs, such + * as Itanium. Thus, in general we need to emulate the behavior. IIM contains + * the relative target (not yet sign extended). So after sign extending it we + * simply add it to IIP. We also need to reset the EI field of the IPSR to zero, + * i.e., the slot to restart into. + * + * cr.imm contains zero_ext(imm21) + */ + MOV_FROM_IIM(r18) + ;; + MOV_FROM_IIP(r17) + shl r18=r18,43 // put sign bit in position (43=64-21) + ;; + + MOV_FROM_IPSR(p0, r16) + shr r18=r18,39 // sign extend (39=43-4) + ;; + + add r17=r17,r18 // now add the offset + ;; + MOV_TO_IIP(r17, r19) + dep r16=0,r16,41,2 // clear EI + ;; + + MOV_TO_IPSR(p0, r16, r19) + ;; + + RFI +END(speculation_vector) + + .org ia64_ivt+0x5800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5800 Entry 28 (size 16 bundles) Reserved + DBG_FAULT(28) + FAULT(28) + + .org ia64_ivt+0x5900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5900 Entry 29 (size 16 bundles) Debug (16,28,56) +ENTRY(debug_vector) + DBG_FAULT(29) + FAULT(29) +END(debug_vector) + + .org ia64_ivt+0x5a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5a00 Entry 30 (size 16 bundles) Unaligned Reference (57) +ENTRY(unaligned_access) + DBG_FAULT(30) + mov r31=pr // prepare to save predicates + ;; + br.sptk.many dispatch_unaligned_handler +END(unaligned_access) + + .org ia64_ivt+0x5b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5b00 Entry 31 (size 16 bundles) Unsupported Data Reference (57) +ENTRY(unsupported_data_reference) + DBG_FAULT(31) + FAULT(31) +END(unsupported_data_reference) + + .org ia64_ivt+0x5c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5c00 Entry 32 (size 16 bundles) Floating-Point Fault (64) +ENTRY(floating_point_fault) + DBG_FAULT(32) + FAULT(32) +END(floating_point_fault) + + .org ia64_ivt+0x5d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5d00 Entry 33 (size 16 bundles) Floating Point Trap (66) +ENTRY(floating_point_trap) + DBG_FAULT(33) + FAULT(33) +END(floating_point_trap) + + .org ia64_ivt+0x5e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5e00 Entry 34 (size 16 bundles) Lower Privilege Transfer Trap (66) +ENTRY(lower_privilege_trap) + DBG_FAULT(34) + FAULT(34) +END(lower_privilege_trap) + + .org ia64_ivt+0x5f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x5f00 Entry 35 (size 16 bundles) Taken Branch Trap (68) +ENTRY(taken_branch_trap) + DBG_FAULT(35) + FAULT(35) +END(taken_branch_trap) + + .org ia64_ivt+0x6000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6000 Entry 36 (size 16 bundles) Single Step Trap (69) +ENTRY(single_step_trap) + DBG_FAULT(36) + FAULT(36) +END(single_step_trap) + + .org ia64_ivt+0x6100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6100 Entry 37 (size 16 bundles) Reserved + DBG_FAULT(37) + FAULT(37) + + .org ia64_ivt+0x6200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6200 Entry 38 (size 16 bundles) Reserved + DBG_FAULT(38) + FAULT(38) + + .org ia64_ivt+0x6300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6300 Entry 39 (size 16 bundles) Reserved + DBG_FAULT(39) + FAULT(39) + + .org ia64_ivt+0x6400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6400 Entry 40 (size 16 bundles) Reserved + DBG_FAULT(40) + FAULT(40) + + .org ia64_ivt+0x6500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6500 Entry 41 (size 16 bundles) Reserved + DBG_FAULT(41) + FAULT(41) + + .org ia64_ivt+0x6600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6600 Entry 42 (size 16 bundles) Reserved + DBG_FAULT(42) + FAULT(42) + + .org ia64_ivt+0x6700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6700 Entry 43 (size 16 bundles) Reserved + DBG_FAULT(43) + FAULT(43) + + .org ia64_ivt+0x6800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6800 Entry 44 (size 16 bundles) Reserved + DBG_FAULT(44) + FAULT(44) + + .org ia64_ivt+0x6900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6900 Entry 45 (size 16 bundles) IA-32 Exeception (17,18,29,41,42,43,44,58,60,61,62,72,73,75,76,77) +ENTRY(ia32_exception) + DBG_FAULT(45) + FAULT(45) +END(ia32_exception) + + .org ia64_ivt+0x6a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6a00 Entry 46 (size 16 bundles) IA-32 Intercept (30,31,59,70,71) +ENTRY(ia32_intercept) + DBG_FAULT(46) + FAULT(46) +END(ia32_intercept) + + .org ia64_ivt+0x6b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6b00 Entry 47 (size 16 bundles) IA-32 Interrupt (74) +ENTRY(ia32_interrupt) + DBG_FAULT(47) + FAULT(47) +END(ia32_interrupt) + + .org ia64_ivt+0x6c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6c00 Entry 48 (size 16 bundles) Reserved + DBG_FAULT(48) + FAULT(48) + + .org ia64_ivt+0x6d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6d00 Entry 49 (size 16 bundles) Reserved + DBG_FAULT(49) + FAULT(49) + + .org ia64_ivt+0x6e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6e00 Entry 50 (size 16 bundles) Reserved + DBG_FAULT(50) + FAULT(50) + + .org ia64_ivt+0x6f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x6f00 Entry 51 (size 16 bundles) Reserved + DBG_FAULT(51) + FAULT(51) + + .org ia64_ivt+0x7000 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7000 Entry 52 (size 16 bundles) Reserved + DBG_FAULT(52) + FAULT(52) + + .org ia64_ivt+0x7100 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7100 Entry 53 (size 16 bundles) Reserved + DBG_FAULT(53) + FAULT(53) + + .org ia64_ivt+0x7200 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7200 Entry 54 (size 16 bundles) Reserved + DBG_FAULT(54) + FAULT(54) + + .org ia64_ivt+0x7300 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7300 Entry 55 (size 16 bundles) Reserved + DBG_FAULT(55) + FAULT(55) + + .org ia64_ivt+0x7400 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7400 Entry 56 (size 16 bundles) Reserved + DBG_FAULT(56) + FAULT(56) + + .org ia64_ivt+0x7500 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7500 Entry 57 (size 16 bundles) Reserved + DBG_FAULT(57) + FAULT(57) + + .org ia64_ivt+0x7600 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7600 Entry 58 (size 16 bundles) Reserved + DBG_FAULT(58) + FAULT(58) + + .org ia64_ivt+0x7700 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7700 Entry 59 (size 16 bundles) Reserved + DBG_FAULT(59) + FAULT(59) + + .org ia64_ivt+0x7800 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7800 Entry 60 (size 16 bundles) Reserved + DBG_FAULT(60) + FAULT(60) + + .org ia64_ivt+0x7900 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7900 Entry 61 (size 16 bundles) Reserved + DBG_FAULT(61) + FAULT(61) + + .org ia64_ivt+0x7a00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7a00 Entry 62 (size 16 bundles) Reserved + DBG_FAULT(62) + FAULT(62) + + .org ia64_ivt+0x7b00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7b00 Entry 63 (size 16 bundles) Reserved + DBG_FAULT(63) + FAULT(63) + + .org ia64_ivt+0x7c00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7c00 Entry 64 (size 16 bundles) Reserved + DBG_FAULT(64) + FAULT(64) + + .org ia64_ivt+0x7d00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7d00 Entry 65 (size 16 bundles) Reserved + DBG_FAULT(65) + FAULT(65) + + .org ia64_ivt+0x7e00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7e00 Entry 66 (size 16 bundles) Reserved + DBG_FAULT(66) + FAULT(66) + + .org ia64_ivt+0x7f00 +///////////////////////////////////////////////////////////////////////////////////////// +// 0x7f00 Entry 67 (size 16 bundles) Reserved + DBG_FAULT(67) + FAULT(67) + + //----------------------------------------------------------------------------------- + // call do_page_fault (predicates are in r31, psr.dt may be off, r16 is faulting address) +ENTRY(page_fault) + SSM_PSR_DT_AND_SRLZ_I + ;; + SAVE_MIN_WITH_COVER + alloc r15=ar.pfs,0,0,3,0 + MOV_FROM_IFA(out0) + MOV_FROM_ISR(out1) + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r14, r3) + adds r3=8,r2 // set up second base pointer + SSM_PSR_I(p15, p15, r14) // restore psr.i + movl r14=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r14 + ;; + adds out2=16,r12 // out2 = pointer to pt_regs + br.call.sptk.many b6=ia64_do_page_fault // ignore return address +END(page_fault) + +ENTRY(non_syscall) + mov ar.rsc=r27 // restore ar.rsc before SAVE_MIN_WITH_COVER + ;; + SAVE_MIN_WITH_COVER + + // There is no particular reason for this code to be here, other than that + // there happens to be space here that would go unused otherwise. If this + // fault ever gets "unreserved", simply moved the following code to a more + // suitable spot... + + alloc r14=ar.pfs,0,0,2,0 + MOV_FROM_IIM(out0) + add out1=16,sp + adds r3=8,r2 // set up second base pointer for SAVE_REST + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r15, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r15) // restore psr.i + movl r15=ia64_leave_kernel + ;; + SAVE_REST + mov rp=r15 + ;; + br.call.sptk.many b6=ia64_bad_break // avoid WAW on CFM and ignore return addr +END(non_syscall) + +ENTRY(__interrupt) + DBG_FAULT(12) + mov r31=pr // prepare to save predicates + ;; + SAVE_MIN_WITH_COVER // uses r31; defines r2 and r3 + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r14) + // ensure everybody knows psr.ic is back on + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + ;; + MCA_RECOVER_RANGE(interrupt) + alloc r14=ar.pfs,0,0,2,0 // must be first in an insn group + MOV_FROM_IVR(out0, r8) // pass cr.ivr as first arg + add out1=16,sp // pass pointer to pt_regs as second arg + ;; + srlz.d // make sure we see the effect of cr.ivr + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_handle_irq +END(__interrupt) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_unaligned_handler) + SAVE_MIN_WITH_COVER + ;; + alloc r14=ar.pfs,0,0,2,0 // now it's safe (must be first in insn group!) + MOV_FROM_IFA(out0) + adds out1=16,sp + + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.sptk.many ia64_prepare_handle_unaligned +END(dispatch_unaligned_handler) + + /* + * There is no particular reason for this code to be here, other than that + * there happens to be space here that would go unused otherwise. If this + * fault ever gets "unreserved", simply moved the following code to a more + * suitable spot... + */ + +ENTRY(dispatch_to_fault_handler) + /* + * Input: + * psr.ic: off + * r19: fault vector number (e.g., 24 for General Exception) + * r31: contains saved predicates (pr) + */ + SAVE_MIN_WITH_COVER_R19 + alloc r14=ar.pfs,0,0,5,0 + MOV_FROM_ISR(out1) + MOV_FROM_IFA(out2) + MOV_FROM_IIM(out3) + MOV_FROM_ITIR(out4) + ;; + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, out0) + // guarantee that interruption collection is on + mov out0=r15 + ;; + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + SAVE_REST + movl r14=ia64_leave_kernel + ;; + mov rp=r14 + br.call.sptk.many b6=ia64_fault +END(dispatch_to_fault_handler) + + /* + * Squatting in this space ... + * + * This special case dispatcher for illegal operation faults allows preserved + * registers to be modified through a callback function (asm only) that is handed + * back from the fault handler in r8. Up to three arguments can be passed to the + * callback function by returning an aggregate with the callback as its first + * element, followed by the arguments. + */ +ENTRY(dispatch_illegal_op_fault) + .prologue + .body + SAVE_MIN_WITH_COVER + SSM_PSR_IC_AND_DEFAULT_BITS_AND_SRLZ_I(r3, r24) + // guarantee that interruption collection is on + ;; + SSM_PSR_I(p15, p15, r3) // restore psr.i + adds r3=8,r2 // set up second base pointer for SAVE_REST + ;; + alloc r14=ar.pfs,0,0,1,0 // must be first in insn group + mov out0=ar.ec + ;; + SAVE_REST + PT_REGS_UNWIND_INFO(0) + ;; + br.call.sptk.many rp=ia64_illegal_op_fault +.ret0: ;; + alloc r14=ar.pfs,0,0,3,0 // must be first in insn group + mov out0=r9 + mov out1=r10 + mov out2=r11 + movl r15=ia64_leave_kernel + ;; + mov rp=r15 + mov b6=r8 + ;; + cmp.ne p6,p0=0,r8 +(p6) br.call.dpnt.many b6=b6 // call returns to ia64_leave_kernel + br.sptk.many ia64_leave_kernel +END(dispatch_illegal_op_fault) diff --git a/arch/ia64/kernel/kprobes.c b/arch/ia64/kernel/kprobes.c new file mode 100644 index 000000000000..ca34e51e84b4 --- /dev/null +++ b/arch/ia64/kernel/kprobes.c @@ -0,0 +1,911 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Kernel Probes (KProbes) + * arch/ia64/kernel/kprobes.c + * + * Copyright (C) IBM Corporation, 2002, 2004 + * Copyright (C) Intel Corporation, 2005 + * + * 2005-Apr Rusty Lynch and Anil S Keshavamurthy + * adapted from i386 + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; +DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); + +struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}}; + +enum instruction_type {A, I, M, F, B, L, X, u}; +static enum instruction_type bundle_encoding[32][3] = { + [0x00] = { M, I, I }, + [0x01] = { M, I, I }, + [0x02] = { M, I, I }, + [0x03] = { M, I, I }, + [0x04] = { M, L, X }, + [0x05] = { M, L, X }, + [0x06] = { u, u, u }, + [0x07] = { u, u, u }, + [0x08] = { M, M, I }, + [0x09] = { M, M, I }, + [0x0A] = { M, M, I }, + [0x0B] = { M, M, I }, + [0x0C] = { M, F, I }, + [0x0D] = { M, F, I }, + [0x0E] = { M, M, F }, + [0x0F] = { M, M, F }, + [0x10] = { M, I, B }, + [0x11] = { M, I, B }, + [0x12] = { M, B, B }, + [0x13] = { M, B, B }, + [0x14] = { u, u, u }, + [0x15] = { u, u, u }, + [0x16] = { B, B, B }, + [0x17] = { B, B, B }, + [0x18] = { M, M, B }, + [0x19] = { M, M, B }, + [0x1A] = { u, u, u }, + [0x1B] = { u, u, u }, + [0x1C] = { M, F, B }, + [0x1D] = { M, F, B }, + [0x1E] = { u, u, u }, + [0x1F] = { u, u, u }, +}; + +/* Insert a long branch code */ +static void __kprobes set_brl_inst(void *from, void *to) +{ + s64 rel = ((s64) to - (s64) from) >> 4; + bundle_t *brl; + brl = (bundle_t *) ((u64) from & ~0xf); + brl->quad0.template = 0x05; /* [MLX](stop) */ + brl->quad0.slot0 = NOP_M_INST; /* nop.m 0x0 */ + brl->quad0.slot1_p0 = ((rel >> 20) & 0x7fffffffff) << 2; + brl->quad1.slot1_p1 = (((rel >> 20) & 0x7fffffffff) << 2) >> (64 - 46); + /* brl.cond.sptk.many.clr rel<<4 (qp=0) */ + brl->quad1.slot2 = BRL_INST(rel >> 59, rel & 0xfffff); +} + +/* + * In this function we check to see if the instruction + * is IP relative instruction and update the kprobe + * inst flag accordingly + */ +static void __kprobes update_kprobe_inst_flag(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p) +{ + p->ainsn.inst_flag = 0; + p->ainsn.target_br_reg = 0; + p->ainsn.slot = slot; + + /* Check for Break instruction + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if ((!major_opcode) && (!((kprobe_inst >> 27) & 0x1FF)) ) { + /* is a break instruction */ + p->ainsn.inst_flag |= INST_FLAG_BREAK_INST; + return; + } + + if (bundle_encoding[template][slot] == B) { + switch (major_opcode) { + case INDIRECT_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + case IP_RELATIVE_PREDICT_OPCODE: + case IP_RELATIVE_BRANCH_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + break; + case IP_RELATIVE_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_RELATIVE_IP_ADDR; + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } else if (bundle_encoding[template][slot] == X) { + switch (major_opcode) { + case LONG_CALL_OPCODE: + p->ainsn.inst_flag |= INST_FLAG_FIX_BRANCH_REG; + p->ainsn.target_br_reg = ((kprobe_inst >> 6) & 0x7); + break; + } + } + return; +} + +/* + * In this function we check to see if the instruction + * (qp) cmpx.crel.ctype p1,p2=r2,r3 + * on which we are inserting kprobe is cmp instruction + * with ctype as unc. + */ +static uint __kprobes is_cmp_ctype_unc_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst) +{ + cmp_inst_t cmp_inst; + uint ctype_unc = 0; + + if (!((bundle_encoding[template][slot] == I) || + (bundle_encoding[template][slot] == M))) + goto out; + + if (!((major_opcode == 0xC) || (major_opcode == 0xD) || + (major_opcode == 0xE))) + goto out; + + cmp_inst.l = kprobe_inst; + if ((cmp_inst.f.x2 == 0) || (cmp_inst.f.x2 == 1)) { + /* Integer compare - Register Register (A6 type)*/ + if ((cmp_inst.f.tb == 0) && (cmp_inst.f.ta == 0) + &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } else if ((cmp_inst.f.x2 == 2)||(cmp_inst.f.x2 == 3)) { + /* Integer compare - Immediate Register (A8 type)*/ + if ((cmp_inst.f.ta == 0) &&(cmp_inst.f.c == 1)) + ctype_unc = 1; + } +out: + return ctype_unc; +} + +/* + * In this function we check to see if the instruction + * on which we are inserting kprobe is supported. + * Returns qp value if supported + * Returns -EINVAL if unsupported + */ +static int __kprobes unsupported_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + unsigned long addr) +{ + int qp; + + qp = kprobe_inst & 0x3f; + if (is_cmp_ctype_unc_inst(template, slot, major_opcode, kprobe_inst)) { + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on cmp unc " + "instruction on slot 1 at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + else if (bundle_encoding[template][slot] == I) { + if (major_opcode == 0) { + /* + * Check for Integer speculation instruction + * - Bit 33-35 to be equal to 0x1 + */ + if (((kprobe_inst >> 33) & 0x7) == 1) { + printk(KERN_WARNING + "Kprobes on speculation inst at <0x%lx> not supported\n", + addr); + return -EINVAL; + } + /* + * IP relative mov instruction + * - Bit 27-35 to be equal to 0x30 + */ + if (((kprobe_inst >> 27) & 0x1FF) == 0x30) { + printk(KERN_WARNING + "Kprobes on \"mov r1=ip\" at <0x%lx> not supported\n", + addr); + return -EINVAL; + + } + } + else if ((major_opcode == 5) && !(kprobe_inst & (0xFUl << 33)) && + (kprobe_inst & (0x1UL << 12))) { + /* test bit instructions, tbit,tnat,tf + * bit 33-36 to be equal to 0 + * bit 12 to be equal to 1 + */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on test bit " + "instruction on slot at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + } + qp = 0; + } + } + else if (bundle_encoding[template][slot] == B) { + if (major_opcode == 7) { + /* IP-Relative Predict major code is 7 */ + printk(KERN_WARNING "Kprobes on IP-Relative" + "Predict is not supported\n"); + return -EINVAL; + } + else if (major_opcode == 2) { + /* Indirect Predict, major code is 2 + * bit 27-32 to be equal to 10 or 11 + */ + int x6=(kprobe_inst >> 27) & 0x3F; + if ((x6 == 0x10) || (x6 == 0x11)) { + printk(KERN_WARNING "Kprobes on " + "Indirect Predict is not supported\n"); + return -EINVAL; + } + } + } + /* kernel does not use float instruction, here for safety kprobe + * will judge whether it is fcmp/flass/float approximation instruction + */ + else if (unlikely(bundle_encoding[template][slot] == F)) { + if ((major_opcode == 4 || major_opcode == 5) && + (kprobe_inst & (0x1 << 12))) { + /* fcmp/fclass unc instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on fcmp/fclass " + "instruction on slot at <0x%lx> " + "is not supported\n", addr); + return -EINVAL; + + } + qp = 0; + } + if ((major_opcode == 0 || major_opcode == 1) && + (kprobe_inst & (0x1UL << 33))) { + /* float Approximation instruction */ + if (slot == 1 && qp) { + printk(KERN_WARNING "Kprobes on float Approx " + "instr at <0x%lx> is not supported\n", + addr); + return -EINVAL; + } + qp = 0; + } + } + return qp; +} + +/* + * In this function we override the bundle with + * the break instruction at the given slot. + */ +static void __kprobes prepare_break_inst(uint template, uint slot, + uint major_opcode, + unsigned long kprobe_inst, + struct kprobe *p, + int qp) +{ + unsigned long break_inst = BREAK_INST; + bundle_t *bundle = &p->opcode.bundle; + + /* + * Copy the original kprobe_inst qualifying predicate(qp) + * to the break instruction + */ + break_inst |= qp; + + switch (slot) { + case 0: + bundle->quad0.slot0 = break_inst; + break; + case 1: + bundle->quad0.slot1_p0 = break_inst; + bundle->quad1.slot1_p1 = break_inst >> (64-46); + break; + case 2: + bundle->quad1.slot2 = break_inst; + break; + } + + /* + * Update the instruction flag, so that we can + * emulate the instruction properly after we + * single step on original instruction + */ + update_kprobe_inst_flag(template, slot, major_opcode, kprobe_inst, p); +} + +static void __kprobes get_kprobe_inst(bundle_t *bundle, uint slot, + unsigned long *kprobe_inst, uint *major_opcode) +{ + unsigned long kprobe_inst_p0, kprobe_inst_p1; + unsigned int template; + + template = bundle->quad0.template; + + switch (slot) { + case 0: + *major_opcode = (bundle->quad0.slot0 >> SLOT0_OPCODE_SHIFT); + *kprobe_inst = bundle->quad0.slot0; + break; + case 1: + *major_opcode = (bundle->quad1.slot1_p1 >> SLOT1_p1_OPCODE_SHIFT); + kprobe_inst_p0 = bundle->quad0.slot1_p0; + kprobe_inst_p1 = bundle->quad1.slot1_p1; + *kprobe_inst = kprobe_inst_p0 | (kprobe_inst_p1 << (64-46)); + break; + case 2: + *major_opcode = (bundle->quad1.slot2 >> SLOT2_OPCODE_SHIFT); + *kprobe_inst = bundle->quad1.slot2; + break; + } +} + +/* Returns non-zero if the addr is in the Interrupt Vector Table */ +static int __kprobes in_ivt_functions(unsigned long addr) +{ + return (addr >= (unsigned long)__start_ivt_text + && addr < (unsigned long)__end_ivt_text); +} + +static int __kprobes valid_kprobe_addr(int template, int slot, + unsigned long addr) +{ + if ((slot > 2) || ((bundle_encoding[template][1] == L) && slot > 1)) { + printk(KERN_WARNING "Attempting to insert unaligned kprobe " + "at 0x%lx\n", addr); + return -EINVAL; + } + + if (in_ivt_functions(addr)) { + printk(KERN_WARNING "Kprobes can't be inserted inside " + "IVT functions at 0x%lx\n", addr); + return -EINVAL; + } + + return 0; +} + +static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + unsigned int i; + i = atomic_add_return(1, &kcb->prev_kprobe_index); + kcb->prev_kprobe[i-1].kp = kprobe_running(); + kcb->prev_kprobe[i-1].status = kcb->kprobe_status; +} + +static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) +{ + unsigned int i; + i = atomic_read(&kcb->prev_kprobe_index); + __this_cpu_write(current_kprobe, kcb->prev_kprobe[i-1].kp); + kcb->kprobe_status = kcb->prev_kprobe[i-1].status; + atomic_sub(1, &kcb->prev_kprobe_index); +} + +static void __kprobes set_current_kprobe(struct kprobe *p, + struct kprobe_ctlblk *kcb) +{ + __this_cpu_write(current_kprobe, p); +} + +void __kretprobe_trampoline(void) +{ +} + +int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) +{ + regs->cr_iip = __kretprobe_trampoline_handler(regs, NULL); + /* + * By returning a non-zero value, we are telling + * kprobe_handler() that we don't want the post_handler + * to run (and have re-enabled preemption) + */ + return 1; +} + +void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, + struct pt_regs *regs) +{ + ri->ret_addr = (kprobe_opcode_t *)regs->b0; + ri->fp = NULL; + + /* Replace the return addr with trampoline addr */ + regs->b0 = (unsigned long)dereference_function_descriptor(__kretprobe_trampoline); +} + +/* Check the instruction in the slot is break */ +static int __kprobes __is_ia64_break_inst(bundle_t *bundle, uint slot) +{ + unsigned int major_opcode; + unsigned int template = bundle->quad0.template; + unsigned long kprobe_inst; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get Kprobe probe instruction at given slot*/ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + /* For break instruction, + * Bits 37:40 Major opcode to be zero + * Bits 27:32 X6 to be zero + * Bits 32:35 X3 to be zero + */ + if (major_opcode || ((kprobe_inst >> 27) & 0x1FF)) { + /* Not a break instruction */ + return 0; + } + + /* Is a break instruction */ + return 1; +} + +/* + * In this function, we check whether the target bundle modifies IP or + * it triggers an exception. If so, it cannot be boostable. + */ +static int __kprobes can_boost(bundle_t *bundle, uint slot, + unsigned long bundle_addr) +{ + unsigned int template = bundle->quad0.template; + + do { + if (search_exception_tables(bundle_addr + slot) || + __is_ia64_break_inst(bundle, slot)) + return 0; /* exception may occur in this bundle*/ + } while ((++slot) < 3); + template &= 0x1e; + if (template >= 0x10 /* including B unit */ || + template == 0x04 /* including X unit */ || + template == 0x06) /* undefined */ + return 0; + + return 1; +} + +/* Prepare long jump bundle and disables other boosters if need */ +static void __kprobes prepare_booster(struct kprobe *p) +{ + unsigned long addr = (unsigned long)p->addr & ~0xFULL; + unsigned int slot = (unsigned long)p->addr & 0xf; + struct kprobe *other_kp; + + if (can_boost(&p->ainsn.insn[0].bundle, slot, addr)) { + set_brl_inst(&p->ainsn.insn[1].bundle, (bundle_t *)addr + 1); + p->ainsn.inst_flag |= INST_FLAG_BOOSTABLE; + } + + /* disables boosters in previous slots */ + for (; addr < (unsigned long)p->addr; addr++) { + other_kp = get_kprobe((void *)addr); + if (other_kp) + other_kp->ainsn.inst_flag &= ~INST_FLAG_BOOSTABLE; + } +} + +int __kprobes arch_prepare_kprobe(struct kprobe *p) +{ + unsigned long addr = (unsigned long) p->addr; + unsigned long *kprobe_addr = (unsigned long *)(addr & ~0xFULL); + unsigned long kprobe_inst=0; + unsigned int slot = addr & 0xf, template, major_opcode = 0; + bundle_t *bundle; + int qp; + + bundle = &((kprobe_opcode_t *)kprobe_addr)->bundle; + template = bundle->quad0.template; + + if(valid_kprobe_addr(template, slot, addr)) + return -EINVAL; + + /* Move to slot 2, if bundle is MLX type and kprobe slot is 1 */ + if (slot == 1 && bundle_encoding[template][1] == L) + slot++; + + /* Get kprobe_inst and major_opcode from the bundle */ + get_kprobe_inst(bundle, slot, &kprobe_inst, &major_opcode); + + qp = unsupported_inst(template, slot, major_opcode, kprobe_inst, addr); + if (qp < 0) + return -EINVAL; + + p->ainsn.insn = get_insn_slot(); + if (!p->ainsn.insn) + return -ENOMEM; + memcpy(&p->opcode, kprobe_addr, sizeof(kprobe_opcode_t)); + memcpy(p->ainsn.insn, kprobe_addr, sizeof(kprobe_opcode_t)); + + prepare_break_inst(template, slot, major_opcode, kprobe_inst, p, qp); + + prepare_booster(p); + + return 0; +} + +void __kprobes arch_arm_kprobe(struct kprobe *p) +{ + unsigned long arm_addr; + bundle_t *src, *dest; + + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; + src = &p->opcode.bundle; + + flush_icache_range((unsigned long)p->ainsn.insn, + (unsigned long)p->ainsn.insn + + sizeof(kprobe_opcode_t) * MAX_INSN_SIZE); + + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_disarm_kprobe(struct kprobe *p) +{ + unsigned long arm_addr; + bundle_t *src, *dest; + + arm_addr = ((unsigned long)p->addr) & ~0xFUL; + dest = &((kprobe_opcode_t *)arm_addr)->bundle; + /* p->ainsn.insn contains the original unaltered kprobe_opcode_t */ + src = &p->ainsn.insn->bundle; + switch (p->ainsn.slot) { + case 0: + dest->quad0.slot0 = src->quad0.slot0; + break; + case 1: + dest->quad1.slot1_p1 = src->quad1.slot1_p1; + break; + case 2: + dest->quad1.slot2 = src->quad1.slot2; + break; + } + flush_icache_range(arm_addr, arm_addr + sizeof(kprobe_opcode_t)); +} + +void __kprobes arch_remove_kprobe(struct kprobe *p) +{ + if (p->ainsn.insn) { + free_insn_slot(p->ainsn.insn, + p->ainsn.inst_flag & INST_FLAG_BOOSTABLE); + p->ainsn.insn = NULL; + } +} +/* + * We are resuming execution after a single step fault, so the pt_regs + * structure reflects the register state after we executed the instruction + * located in the kprobe (p->ainsn.insn->bundle). We still need to adjust + * the ip to point back to the original stack address. To set the IP address + * to original stack address, handle the case where we need to fixup the + * relative IP address and/or fixup branch register. + */ +static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) (&p->ainsn.insn->bundle); + unsigned long resume_addr = (unsigned long)p->addr & ~0xFULL; + unsigned long template; + int slot = ((unsigned long)p->addr & 0xf); + + template = p->ainsn.insn->bundle.quad0.template; + + if (slot == 1 && bundle_encoding[template][1] == L) + slot = 2; + + if (p->ainsn.inst_flag & ~INST_FLAG_BOOSTABLE) { + + if (p->ainsn.inst_flag & INST_FLAG_FIX_RELATIVE_IP_ADDR) { + /* Fix relative IP address */ + regs->cr_iip = (regs->cr_iip - bundle_addr) + + resume_addr; + } + + if (p->ainsn.inst_flag & INST_FLAG_FIX_BRANCH_REG) { + /* + * Fix target branch register, software convention is + * to use either b0 or b6 or b7, so just checking + * only those registers + */ + switch (p->ainsn.target_br_reg) { + case 0: + if ((regs->b0 == bundle_addr) || + (regs->b0 == bundle_addr + 0x10)) { + regs->b0 = (regs->b0 - bundle_addr) + + resume_addr; + } + break; + case 6: + if ((regs->b6 == bundle_addr) || + (regs->b6 == bundle_addr + 0x10)) { + regs->b6 = (regs->b6 - bundle_addr) + + resume_addr; + } + break; + case 7: + if ((regs->b7 == bundle_addr) || + (regs->b7 == bundle_addr + 0x10)) { + regs->b7 = (regs->b7 - bundle_addr) + + resume_addr; + } + break; + } /* end switch */ + } + goto turn_ss_off; + } + + if (slot == 2) { + if (regs->cr_iip == bundle_addr + 0x10) { + regs->cr_iip = resume_addr + 0x10; + } + } else { + if (regs->cr_iip == bundle_addr) { + regs->cr_iip = resume_addr; + } + } + +turn_ss_off: + /* Turn off Single Step bit */ + ia64_psr(regs)->ss = 0; +} + +static void __kprobes prepare_ss(struct kprobe *p, struct pt_regs *regs) +{ + unsigned long bundle_addr = (unsigned long) &p->ainsn.insn->bundle; + unsigned long slot = (unsigned long)p->addr & 0xf; + + /* single step inline if break instruction */ + if (p->ainsn.inst_flag == INST_FLAG_BREAK_INST) + regs->cr_iip = (unsigned long)p->addr & ~0xFULL; + else + regs->cr_iip = bundle_addr & ~0xFULL; + + if (slot > 2) + slot = 0; + + ia64_psr(regs)->ri = slot; + + /* turn on single stepping */ + ia64_psr(regs)->ss = 1; +} + +static int __kprobes is_ia64_break_inst(struct pt_regs *regs) +{ + unsigned int slot = ia64_psr(regs)->ri; + unsigned long *kprobe_addr = (unsigned long *)regs->cr_iip; + bundle_t bundle; + + memcpy(&bundle, kprobe_addr, sizeof(bundle_t)); + + return __is_ia64_break_inst(&bundle, slot); +} + +static int __kprobes pre_kprobes_handler(struct die_args *args) +{ + struct kprobe *p; + int ret = 0; + struct pt_regs *regs = args->regs; + kprobe_opcode_t *addr = (kprobe_opcode_t *)instruction_pointer(regs); + struct kprobe_ctlblk *kcb; + + /* + * We don't want to be preempted for the entire + * duration of kprobe processing + */ + preempt_disable(); + kcb = get_kprobe_ctlblk(); + + /* Handle recursion cases */ + if (kprobe_running()) { + p = get_kprobe(addr); + if (p) { + if ((kcb->kprobe_status == KPROBE_HIT_SS) && + (p->ainsn.inst_flag == INST_FLAG_BREAK_INST)) { + ia64_psr(regs)->ss = 0; + goto no_kprobe; + } + /* We have reentered the pre_kprobe_handler(), since + * another probe was hit while within the handler. + * We here save the original kprobes variables and + * just single step on the instruction of the new probe + * without calling any user handlers. + */ + save_previous_kprobe(kcb); + set_current_kprobe(p, kcb); + kprobes_inc_nmissed_count(p); + prepare_ss(p, regs); + kcb->kprobe_status = KPROBE_REENTER; + return 1; + } else if (!is_ia64_break_inst(regs)) { + /* The breakpoint instruction was removed by + * another cpu right after we hit, no further + * handling of this interrupt is appropriate + */ + ret = 1; + goto no_kprobe; + } else { + /* Not our break */ + goto no_kprobe; + } + } + + p = get_kprobe(addr); + if (!p) { + if (!is_ia64_break_inst(regs)) { + /* + * The breakpoint instruction was removed right + * after we hit it. Another cpu has removed + * either a probepoint or a debugger breakpoint + * at this address. In either case, no further + * handling of this interrupt is appropriate. + */ + ret = 1; + + } + + /* Not one of our break, let kernel handle it */ + goto no_kprobe; + } + + set_current_kprobe(p, kcb); + kcb->kprobe_status = KPROBE_HIT_ACTIVE; + + if (p->pre_handler && p->pre_handler(p, regs)) { + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } + +#if !defined(CONFIG_PREEMPTION) + if (p->ainsn.inst_flag == INST_FLAG_BOOSTABLE && !p->post_handler) { + /* Boost up -- we can execute copied instructions directly */ + ia64_psr(regs)->ri = p->ainsn.slot; + regs->cr_iip = (unsigned long)&p->ainsn.insn->bundle & ~0xFULL; + /* turn single stepping off */ + ia64_psr(regs)->ss = 0; + + reset_current_kprobe(); + preempt_enable_no_resched(); + return 1; + } +#endif + prepare_ss(p, regs); + kcb->kprobe_status = KPROBE_HIT_SS; + return 1; + +no_kprobe: + preempt_enable_no_resched(); + return ret; +} + +static int __kprobes post_kprobes_handler(struct pt_regs *regs) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + if (!cur) + return 0; + + if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) { + kcb->kprobe_status = KPROBE_HIT_SSDONE; + cur->post_handler(cur, regs, 0); + } + + resume_execution(cur, regs); + + /*Restore back the original saved kprobes variables and continue. */ + if (kcb->kprobe_status == KPROBE_REENTER) { + restore_previous_kprobe(kcb); + goto out; + } + reset_current_kprobe(); + +out: + preempt_enable_no_resched(); + return 1; +} + +int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr) +{ + struct kprobe *cur = kprobe_running(); + struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + + + switch(kcb->kprobe_status) { + case KPROBE_HIT_SS: + case KPROBE_REENTER: + /* + * We are here because the instruction being single + * stepped caused a page fault. We reset the current + * kprobe and the instruction pointer points back to + * the probe address and allow the page fault handler + * to continue as a normal page fault. + */ + regs->cr_iip = ((unsigned long)cur->addr) & ~0xFULL; + ia64_psr(regs)->ri = ((unsigned long)cur->addr) & 0xf; + if (kcb->kprobe_status == KPROBE_REENTER) + restore_previous_kprobe(kcb); + else + reset_current_kprobe(); + preempt_enable_no_resched(); + break; + case KPROBE_HIT_ACTIVE: + case KPROBE_HIT_SSDONE: + /* + * In case the user-specified fault handler returned + * zero, try to fix up. + */ + if (ia64_done_with_exception(regs)) + return 1; + + /* + * Let ia64_do_page_fault() fix it. + */ + break; + default: + break; + } + + return 0; +} + +int __kprobes kprobe_exceptions_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + struct die_args *args = (struct die_args *)data; + int ret = NOTIFY_DONE; + + if (args->regs && user_mode(args->regs)) + return ret; + + switch(val) { + case DIE_BREAK: + /* err is break number from ia64_bad_break() */ + if ((args->err >> 12) == (__IA64_BREAK_KPROBE >> 12) + || args->err == 0) + if (pre_kprobes_handler(args)) + ret = NOTIFY_STOP; + break; + case DIE_FAULT: + /* err is vector number from ia64_fault() */ + if (args->err == 36) + if (post_kprobes_handler(args->regs)) + ret = NOTIFY_STOP; + break; + default: + break; + } + return ret; +} + +static struct kprobe trampoline_p = { + .pre_handler = trampoline_probe_handler +}; + +int __init arch_init_kprobes(void) +{ + trampoline_p.addr = + dereference_function_descriptor(__kretprobe_trampoline); + return register_kprobe(&trampoline_p); +} + +int __kprobes arch_trampoline_kprobe(struct kprobe *p) +{ + if (p->addr == + dereference_function_descriptor(__kretprobe_trampoline)) + return 1; + + return 0; +} diff --git a/arch/ia64/kernel/machine_kexec.c b/arch/ia64/kernel/machine_kexec.c new file mode 100644 index 000000000000..4db9ca144fa5 --- /dev/null +++ b/arch/ia64/kernel/machine_kexec.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * arch/ia64/kernel/machine_kexec.c + * + * Handle transition of Linux booting another kernel + * Copyright (C) 2005 Hewlett-Packard Development Comapny, L.P. + * Copyright (C) 2005 Khalid Aziz + * Copyright (C) 2006 Intel Corp, Zou Nan hai + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +typedef void (*relocate_new_kernel_t)( + unsigned long indirection_page, + unsigned long start_address, + struct ia64_boot_param *boot_param, + unsigned long pal_addr) __noreturn; + +struct kimage *ia64_kimage; + +struct resource efi_memmap_res = { + .name = "EFI Memory Map", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + +struct resource boot_param_res = { + .name = "Boot parameter", + .start = 0, + .end = 0, + .flags = IORESOURCE_BUSY | IORESOURCE_MEM +}; + + +/* + * Do what every setup is needed on image and the + * reboot code buffer to allow us to avoid allocations + * later. + */ +int machine_kexec_prepare(struct kimage *image) +{ + void *control_code_buffer; + const unsigned long *func; + + func = (unsigned long *)&relocate_new_kernel; + /* Pre-load control code buffer to minimize work in kexec path */ + control_code_buffer = page_address(image->control_code_page); + memcpy((void *)control_code_buffer, (const void *)func[0], + relocate_new_kernel_size); + flush_icache_range((unsigned long)control_code_buffer, + (unsigned long)control_code_buffer + relocate_new_kernel_size); + ia64_kimage = image; + + return 0; +} + +void machine_kexec_cleanup(struct kimage *image) +{ +} + +/* + * Do not allocate memory (or fail in any way) in machine_kexec(). + * We are past the point of no return, committed to rebooting now. + */ +static void ia64_machine_kexec(struct unw_frame_info *info, void *arg) +{ + struct kimage *image = arg; + relocate_new_kernel_t rnk; + void *pal_addr = efi_get_pal_addr(); + unsigned long code_addr; + int ii; + u64 fp, gp; + ia64_fptr_t *init_handler = (ia64_fptr_t *)ia64_os_init_on_kdump; + + BUG_ON(!image); + code_addr = (unsigned long)page_address(image->control_code_page); + if (image->type == KEXEC_TYPE_CRASH) { + crash_save_this_cpu(); + current->thread.ksp = (__u64)info->sw - 16; + + /* Register noop init handler */ + fp = ia64_tpa(init_handler->fp); + gp = ia64_tpa(ia64_getreg(_IA64_REG_GP)); + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, fp, gp, 0, fp, gp, 0); + } else { + /* Unregister init handlers of current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, 0, 0, 0, 0, 0, 0); + } + + /* Unregister mca handler - No more recovery on current kernel */ + ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, 0, 0, 0, 0, 0, 0); + + /* Interrupts aren't acceptable while we reboot */ + local_irq_disable(); + + /* Mask CMC and Performance Monitor interrupts */ + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* Mask ITV and Local Redirect Registers */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + + /* terminate possible nested in-service interrupts */ + for (ii = 0; ii < 16; ii++) + ia64_eoi(); + + /* unmask TPR and clear any pending interrupts */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + ia64_srlz_d(); + while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR) + ia64_eoi(); + rnk = (relocate_new_kernel_t)&code_addr; + (*rnk)(image->head, image->start, ia64_boot_param, + GRANULEROUNDDOWN((unsigned long) pal_addr)); + BUG(); +} + +void machine_kexec(struct kimage *image) +{ + BUG_ON(!image); + unw_init_running(ia64_machine_kexec, image); + for(;;); +} + +void arch_crash_save_vmcoreinfo(void) +{ +#if defined(CONFIG_SPARSEMEM) + VMCOREINFO_SYMBOL(pgdat_list); + VMCOREINFO_LENGTH(pgdat_list, MAX_NUMNODES); +#endif +#ifdef CONFIG_NUMA + VMCOREINFO_SYMBOL(node_memblk); + VMCOREINFO_LENGTH(node_memblk, NR_NODE_MEMBLKS); + VMCOREINFO_STRUCT_SIZE(node_memblk_s); + VMCOREINFO_OFFSET(node_memblk_s, start_paddr); + VMCOREINFO_OFFSET(node_memblk_s, size); +#endif +#if CONFIG_PGTABLE_LEVELS == 3 + VMCOREINFO_CONFIG(PGTABLE_3); +#elif CONFIG_PGTABLE_LEVELS == 4 + VMCOREINFO_CONFIG(PGTABLE_4); +#endif +} + diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c new file mode 100644 index 000000000000..2671688d349a --- /dev/null +++ b/arch/ia64/kernel/mca.c @@ -0,0 +1,2111 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * File: mca.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * Copyright (C) 2002 Dell Inc. + * Copyright (C) Matt Domsch + * + * Copyright (C) 2002 Intel + * Copyright (C) Jenna Hall + * + * Copyright (C) 2001 Intel + * Copyright (C) Fred Lewis + * + * Copyright (C) 2000 Intel + * Copyright (C) Chuck Fleckenstein + * + * Copyright (C) 1999, 2004-2008 Silicon Graphics, Inc. + * Copyright (C) Vijay Chander + * + * Copyright (C) 2006 FUJITSU LIMITED + * Copyright (C) Hidetoshi Seto + * + * 2000-03-29 Chuck Fleckenstein + * Fixed PAL/SAL update issues, began MCA bug fixes, logging issues, + * added min save state dump, added INIT handler. + * + * 2001-01-03 Fred Lewis + * Added setup of CMCI and CPEI IRQs, logging of corrected platform + * errors, completed code for logging of corrected & uncorrected + * machine check errors, and updated for conformance with Nov. 2000 + * revision of the SAL 3.0 spec. + * + * 2002-01-04 Jenna Hall + * Aligned MCA stack to 16 bytes, added platform vs. CPU error flag, + * set SAL default return values, changed error record structure to + * linked list, added init call to sal_get_state_info_size(). + * + * 2002-03-25 Matt Domsch + * GUID cleanups. + * + * 2003-04-15 David Mosberger-Tang + * Added INIT backtrace support. + * + * 2003-12-08 Keith Owens + * smp_call_function() must not be called from interrupt context + * (can deadlock on tasklist_lock). + * Use keventd to call smp_call_function(). + * + * 2004-02-01 Keith Owens + * Avoid deadlock when using printk() for MCA and INIT records. + * Delete all record printing code, moved to salinfo_decode in user + * space. Mark variables and functions static where possible. + * Delete dead variables and functions. Reorder to remove the need + * for forward declarations and to consolidate related code. + * + * 2005-08-12 Keith Owens + * Convert MCA/INIT handlers to use per event stacks and SAL/OS + * state. + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. + * + * 2006-09-15 Hidetoshi Seto + * Add printing support for MCA/INIT. + * + * 2007-04-27 Russ Anderson + * Support multiple cpus going through OS_MCA in the same event. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mca_drv.h" +#include "entry.h" +#include "irq.h" + +#if defined(IA64_MCA_DEBUG_INFO) +# define IA64_MCA_DEBUG(fmt...) printk(fmt) +#else +# define IA64_MCA_DEBUG(fmt...) do {} while (0) +#endif + +#define NOTIFY_INIT(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "INIT", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +#define NOTIFY_MCA(event, regs, arg, spin) \ +do { \ + if ((notify_die((event), "MCA", (regs), (arg), 0, 0) \ + == NOTIFY_STOP) && ((spin) == 1)) \ + ia64_mca_spin(__func__); \ +} while (0) + +/* Used by mca_asm.S */ +DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ +DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ +DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ +DEFINE_PER_CPU(u64, ia64_mca_pal_base); /* vaddr PAL code granule */ +DEFINE_PER_CPU(u64, ia64_mca_tr_reload); /* Flag for TR reload */ + +unsigned long __per_cpu_mca[NR_CPUS]; + +/* In mca_asm.S */ +extern void ia64_os_init_dispatch_monarch (void); +extern void ia64_os_init_dispatch_slave (void); + +static int monarch_cpu = -1; + +static ia64_mc_info_t ia64_mc_info; + +#define MAX_CPE_POLL_INTERVAL (15*60*HZ) /* 15 minutes */ +#define MIN_CPE_POLL_INTERVAL (2*60*HZ) /* 2 minutes */ +#define CMC_POLL_INTERVAL (1*60*HZ) /* 1 minute */ +#define CPE_HISTORY_LENGTH 5 +#define CMC_HISTORY_LENGTH 5 + +static struct timer_list cpe_poll_timer; +static struct timer_list cmc_poll_timer; +/* + * This variable tells whether we are currently in polling mode. + * Start with this in the wrong state so we won't play w/ timers + * before the system is ready. + */ +static int cmc_polling_enabled = 1; + +/* + * Clearing this variable prevents CPE polling from getting activated + * in mca_late_init. Use it if your system doesn't provide a CPEI, + * but encounters problems retrieving CPE logs. This should only be + * necessary for debugging. + */ +static int cpe_poll_enabled = 1; + +extern void salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe); + +static int mca_init __initdata; + +/* + * limited & delayed printing support for MCA/INIT handler + */ + +#define mprintk(fmt...) ia64_mca_printk(fmt) + +#define MLOGBUF_SIZE (512+256*NR_CPUS) +#define MLOGBUF_MSGMAX 256 +static char mlogbuf[MLOGBUF_SIZE]; +static DEFINE_SPINLOCK(mlogbuf_wlock); /* mca context only */ +static DEFINE_SPINLOCK(mlogbuf_rlock); /* normal context only */ +static unsigned long mlogbuf_start; +static unsigned long mlogbuf_end; +static unsigned int mlogbuf_finished = 0; +static unsigned long mlogbuf_timestamp = 0; + +static int loglevel_save = -1; +#define BREAK_LOGLEVEL(__console_loglevel) \ + oops_in_progress = 1; \ + if (loglevel_save < 0) \ + loglevel_save = __console_loglevel; \ + __console_loglevel = 15; + +#define RESTORE_LOGLEVEL(__console_loglevel) \ + if (loglevel_save >= 0) { \ + __console_loglevel = loglevel_save; \ + loglevel_save = -1; \ + } \ + mlogbuf_finished = 0; \ + oops_in_progress = 0; + +/* + * Push messages into buffer, print them later if not urgent. + */ +void ia64_mca_printk(const char *fmt, ...) +{ + va_list args; + int printed_len; + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + + va_start(args, fmt); + printed_len = vscnprintf(temp_buf, sizeof(temp_buf), fmt, args); + va_end(args); + + /* Copy the output into mlogbuf */ + if (oops_in_progress) { + /* mlogbuf was abandoned, use printk directly instead. */ + printk("%s", temp_buf); + } else { + spin_lock(&mlogbuf_wlock); + for (p = temp_buf; *p; p++) { + unsigned long next = (mlogbuf_end + 1) % MLOGBUF_SIZE; + if (next != mlogbuf_start) { + mlogbuf[mlogbuf_end] = *p; + mlogbuf_end = next; + } else { + /* buffer full */ + break; + } + } + mlogbuf[mlogbuf_end] = '\0'; + spin_unlock(&mlogbuf_wlock); + } +} +EXPORT_SYMBOL(ia64_mca_printk); + +/* + * Print buffered messages. + * NOTE: call this after returning normal context. (ex. from salinfod) + */ +void ia64_mlogbuf_dump(void) +{ + char temp_buf[MLOGBUF_MSGMAX]; + char *p; + unsigned long index; + unsigned long flags; + unsigned int printed_len; + + /* Get output from mlogbuf */ + while (mlogbuf_start != mlogbuf_end) { + temp_buf[0] = '\0'; + p = temp_buf; + printed_len = 0; + + spin_lock_irqsave(&mlogbuf_rlock, flags); + + index = mlogbuf_start; + while (index != mlogbuf_end) { + *p = mlogbuf[index]; + index = (index + 1) % MLOGBUF_SIZE; + if (!*p) + break; + p++; + if (++printed_len >= MLOGBUF_MSGMAX - 1) + break; + } + *p = '\0'; + if (temp_buf[0]) + printk("%s", temp_buf); + mlogbuf_start = index; + + mlogbuf_timestamp = 0; + spin_unlock_irqrestore(&mlogbuf_rlock, flags); + } +} +EXPORT_SYMBOL(ia64_mlogbuf_dump); + +/* + * Call this if system is going to down or if immediate flushing messages to + * console is required. (ex. recovery was failed, crash dump is going to be + * invoked, long-wait rendezvous etc.) + * NOTE: this should be called from monarch. + */ +static void ia64_mlogbuf_finish(int wait) +{ + BREAK_LOGLEVEL(console_loglevel); + + ia64_mlogbuf_dump(); + printk(KERN_EMERG "mlogbuf_finish: printing switched to urgent mode, " + "MCA/INIT might be dodgy or fail.\n"); + + if (!wait) + return; + + /* wait for console */ + printk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + + mlogbuf_finished = 1; +} + +/* + * Print buffered messages from INIT context. + */ +static void ia64_mlogbuf_dump_from_init(void) +{ + if (mlogbuf_finished) + return; + + if (mlogbuf_timestamp && + time_before(jiffies, mlogbuf_timestamp + 30 * HZ)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT " + " and the system seems to be messed up.\n"); + ia64_mlogbuf_finish(0); + return; + } + + if (!spin_trylock(&mlogbuf_rlock)) { + printk(KERN_ERR "INIT: mlogbuf_dump is interrupted by INIT. " + "Generated messages other than stack dump will be " + "buffered to mlogbuf and will be printed later.\n"); + printk(KERN_ERR "INIT: If messages would not printed after " + "this INIT, wait 30sec and assert INIT again.\n"); + if (!mlogbuf_timestamp) + mlogbuf_timestamp = jiffies; + return; + } + spin_unlock(&mlogbuf_rlock); + ia64_mlogbuf_dump(); +} + +static inline void +ia64_mca_spin(const char *func) +{ + if (monarch_cpu == smp_processor_id()) + ia64_mlogbuf_finish(0); + mprintk(KERN_EMERG "%s: spinning here, not returning to SAL\n", func); + while (1) + cpu_relax(); +} +/* + * IA64_MCA log support + */ +#define IA64_MAX_LOGS 2 /* Double-buffering for nested MCAs */ +#define IA64_MAX_LOG_TYPES 4 /* MCA, INIT, CMC, CPE */ + +typedef struct ia64_state_log_s +{ + spinlock_t isl_lock; + int isl_index; + unsigned long isl_count; + ia64_err_rec_t *isl_log[IA64_MAX_LOGS]; /* need space to store header + error log */ +} ia64_state_log_t; + +static ia64_state_log_t ia64_state_log[IA64_MAX_LOG_TYPES]; + +#define IA64_LOG_LOCK_INIT(it) spin_lock_init(&ia64_state_log[it].isl_lock) +#define IA64_LOG_LOCK(it) spin_lock_irqsave(&ia64_state_log[it].isl_lock, s) +#define IA64_LOG_UNLOCK(it) spin_unlock_irqrestore(&ia64_state_log[it].isl_lock,s) +#define IA64_LOG_NEXT_INDEX(it) ia64_state_log[it].isl_index +#define IA64_LOG_CURR_INDEX(it) 1 - ia64_state_log[it].isl_index +#define IA64_LOG_INDEX_INC(it) \ + {ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index; \ + ia64_state_log[it].isl_count++;} +#define IA64_LOG_INDEX_DEC(it) \ + ia64_state_log[it].isl_index = 1 - ia64_state_log[it].isl_index +#define IA64_LOG_NEXT_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)])) +#define IA64_LOG_CURR_BUFFER(it) (void *)((ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)])) +#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count + +static inline void ia64_log_allocate(int it, u64 size) +{ + ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)] = + (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); + if (!ia64_state_log[it].isl_log[IA64_LOG_CURR_INDEX(it)]) + panic("%s: Failed to allocate %llu bytes\n", __func__, size); + + ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)] = + (ia64_err_rec_t *)memblock_alloc(size, SMP_CACHE_BYTES); + if (!ia64_state_log[it].isl_log[IA64_LOG_NEXT_INDEX(it)]) + panic("%s: Failed to allocate %llu bytes\n", __func__, size); +} + +/* + * ia64_log_init + * Reset the OS ia64 log buffer + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * Outputs : None + */ +static void __init +ia64_log_init(int sal_info_type) +{ + u64 max_size = 0; + + IA64_LOG_NEXT_INDEX(sal_info_type) = 0; + IA64_LOG_LOCK_INIT(sal_info_type); + + // SAL will tell us the maximum size of any error record of this type + max_size = ia64_sal_get_state_info_size(sal_info_type); + if (!max_size) + /* alloc_bootmem() doesn't like zero-sized allocations! */ + return; + + // set up OS data structures to hold error info + ia64_log_allocate(sal_info_type, max_size); +} + +/* + * ia64_log_get + * + * Get the current MCA log from SAL and copy it into the OS log buffer. + * + * Inputs : info_type (SAL_INFO_TYPE_{MCA,INIT,CMC,CPE}) + * irq_safe whether you can use printk at this point + * Outputs : size (total record length) + * *buffer (ptr to error record) + * + */ +static u64 +ia64_log_get(int sal_info_type, u8 **buffer, int irq_safe) +{ + sal_log_record_header_t *log_buffer; + u64 total_len = 0; + unsigned long s; + + IA64_LOG_LOCK(sal_info_type); + + /* Get the process state information */ + log_buffer = IA64_LOG_NEXT_BUFFER(sal_info_type); + + total_len = ia64_sal_get_state_info(sal_info_type, (u64 *)log_buffer); + + if (total_len) { + IA64_LOG_INDEX_INC(sal_info_type); + IA64_LOG_UNLOCK(sal_info_type); + if (irq_safe) { + IA64_MCA_DEBUG("%s: SAL error record type %d retrieved. Record length = %ld\n", + __func__, sal_info_type, total_len); + } + *buffer = (u8 *) log_buffer; + return total_len; + } else { + IA64_LOG_UNLOCK(sal_info_type); + return 0; + } +} + +/* + * ia64_mca_log_sal_error_record + * + * This function retrieves a specified error record type from SAL + * and wakes up any processes waiting for error records. + * + * Inputs : sal_info_type (Type of error record MCA/CMC/CPE) + * FIXME: remove MCA and irq_safe. + */ +static void +ia64_mca_log_sal_error_record(int sal_info_type) +{ + u8 *buffer; + sal_log_record_header_t *rh; + u64 size; + int irq_safe = sal_info_type != SAL_INFO_TYPE_MCA; +#ifdef IA64_MCA_DEBUG_INFO + static const char * const rec_name[] = { "MCA", "INIT", "CMC", "CPE" }; +#endif + + size = ia64_log_get(sal_info_type, &buffer, irq_safe); + if (!size) + return; + + salinfo_log_wakeup(sal_info_type, buffer, size, irq_safe); + + if (irq_safe) + IA64_MCA_DEBUG("CPU %d: SAL log contains %s error record\n", + smp_processor_id(), + sal_info_type < ARRAY_SIZE(rec_name) ? rec_name[sal_info_type] : "UNKNOWN"); + + /* Clear logs from corrected errors in case there's no user-level logger */ + rh = (sal_log_record_header_t *)buffer; + if (rh->severity == sal_log_severity_corrected) + ia64_sal_clear_state_info(sal_info_type); +} + +/* + * search_mca_table + * See if the MCA surfaced in an instruction range + * that has been tagged as recoverable. + * + * Inputs + * first First address range to check + * last Last address range to check + * ip Instruction pointer, address we are looking for + * + * Return value: + * 1 on Success (in the table)/ 0 on Failure (not in the table) + */ +int +search_mca_table (const struct mca_table_entry *first, + const struct mca_table_entry *last, + unsigned long ip) +{ + const struct mca_table_entry *curr; + u64 curr_start, curr_end; + + curr = first; + while (curr <= last) { + curr_start = (u64) &curr->start_addr + curr->start_addr; + curr_end = (u64) &curr->end_addr + curr->end_addr; + + if ((ip >= curr_start) && (ip <= curr_end)) { + return 1; + } + curr++; + } + return 0; +} + +/* Given an address, look for it in the mca tables. */ +int mca_recover_range(unsigned long addr) +{ + extern struct mca_table_entry __start___mca_table[]; + extern struct mca_table_entry __stop___mca_table[]; + + return search_mca_table(__start___mca_table, __stop___mca_table-1, addr); +} +EXPORT_SYMBOL_GPL(mca_recover_range); + +int cpe_vector = -1; +int ia64_cpe_irq = -1; + +static irqreturn_t +ia64_mca_cpe_int_handler (int cpe_irq, void *arg) +{ + static unsigned long cpe_history[CPE_HISTORY_LENGTH]; + static int index; + static DEFINE_SPINLOCK(cpe_history_lock); + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __func__, cpe_irq, smp_processor_id()); + + /* SAL spec states this should run w/ interrupts enabled */ + local_irq_enable(); + + spin_lock(&cpe_history_lock); + if (!cpe_poll_enabled && cpe_vector >= 0) { + + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CPE_HISTORY_LENGTH; i++) { + if (now - cpe_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CPE threshold %d/%d\n", count, CPE_HISTORY_LENGTH); + if (count >= CPE_HISTORY_LENGTH) { + + cpe_poll_enabled = 1; + spin_unlock(&cpe_history_lock); + disable_irq_nosync(local_vector_to_irq(IA64_CPE_VECTOR)); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CPE handler; error records may be lost\n"); + + mod_timer(&cpe_poll_timer, jiffies + MIN_CPE_POLL_INTERVAL); + + /* lock already released, get out now */ + goto out; + } else { + cpe_history[index++] = now; + if (index == CPE_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cpe_history_lock); +out: + /* Get the CPE error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CPE); + + local_irq_disable(); + + return IRQ_HANDLED; +} + +/* + * ia64_mca_register_cpev + * + * Register the corrected platform error vector with SAL. + * + * Inputs + * cpev Corrected Platform Error Vector number + * + * Outputs + * None + */ +void +ia64_mca_register_cpev (int cpev) +{ + /* Register the CPE interrupt vector with SAL */ + struct ia64_sal_retval isrv; + + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_CPE_INT, SAL_MC_PARAM_MECHANISM_INT, cpev, 0, 0); + if (isrv.status) { + printk(KERN_ERR "Failed to register Corrected Platform " + "Error interrupt vector with SAL (status %ld)\n", isrv.status); + return; + } + + IA64_MCA_DEBUG("%s: corrected platform error " + "vector %#x registered\n", __func__, cpev); +} + +/* + * ia64_mca_cmc_vector_setup + * + * Setup the corrected machine check vector register in the processor. + * (The interrupt is masked on boot. ia64_mca_late_init unmask this.) + * This function is invoked on a per-processor basis. + * + * Inputs + * None + * + * Outputs + * None + */ +void +ia64_mca_cmc_vector_setup (void) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = 0; + cmcv.cmcv_mask = 1; /* Mask/disable interrupt at first */ + cmcv.cmcv_vector = IA64_CMC_VECTOR; + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x registered.\n", + __func__, smp_processor_id(), IA64_CMC_VECTOR); + + IA64_MCA_DEBUG("%s: CPU %d CMCV = %#016lx\n", + __func__, smp_processor_id(), ia64_getreg(_IA64_REG_CR_CMCV)); +} + +/* + * ia64_mca_cmc_vector_disable + * + * Mask the corrected machine check vector register in the processor. + * This function is invoked on a per-processor basis. + * + * Inputs + * dummy(unused) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_disable (void *dummy) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); + + cmcv.cmcv_mask = 1; /* Mask/disable interrupt */ + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x disabled.\n", + __func__, smp_processor_id(), cmcv.cmcv_vector); +} + +/* + * ia64_mca_cmc_vector_enable + * + * Unmask the corrected machine check vector register in the processor. + * This function is invoked on a per-processor basis. + * + * Inputs + * dummy(unused) + * + * Outputs + * None + */ +static void +ia64_mca_cmc_vector_enable (void *dummy) +{ + cmcv_reg_t cmcv; + + cmcv.cmcv_regval = ia64_getreg(_IA64_REG_CR_CMCV); + + cmcv.cmcv_mask = 0; /* Unmask/enable interrupt */ + ia64_setreg(_IA64_REG_CR_CMCV, cmcv.cmcv_regval); + + IA64_MCA_DEBUG("%s: CPU %d corrected machine check vector %#x enabled.\n", + __func__, smp_processor_id(), cmcv.cmcv_vector); +} + +/* + * ia64_mca_cmc_vector_disable_keventd + * + * Called via keventd (smp_call_function() is not safe in interrupt context) to + * disable the cmc interrupt vector. + */ +static void +ia64_mca_cmc_vector_disable_keventd(struct work_struct *unused) +{ + on_each_cpu(ia64_mca_cmc_vector_disable, NULL, 0); +} + +/* + * ia64_mca_cmc_vector_enable_keventd + * + * Called via keventd (smp_call_function() is not safe in interrupt context) to + * enable the cmc interrupt vector. + */ +static void +ia64_mca_cmc_vector_enable_keventd(struct work_struct *unused) +{ + on_each_cpu(ia64_mca_cmc_vector_enable, NULL, 0); +} + +/* + * ia64_mca_wakeup + * + * Send an inter-cpu interrupt to wake-up a particular cpu. + * + * Inputs : cpuid + * Outputs : None + */ +static void +ia64_mca_wakeup(int cpu) +{ + ia64_send_ipi(cpu, IA64_MCA_WAKEUP_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * ia64_mca_wakeup_all + * + * Wakeup all the slave cpus which have rendez'ed previously. + * + * Inputs : None + * Outputs : None + */ +static void +ia64_mca_wakeup_all(void) +{ + int cpu; + + /* Clear the Rendez checkin flag for all cpus */ + for_each_online_cpu(cpu) { + if (ia64_mc_info.imi_rendez_checkin[cpu] == IA64_MCA_RENDEZ_CHECKIN_DONE) + ia64_mca_wakeup(cpu); + } + +} + +/* + * ia64_mca_rendez_interrupt_handler + * + * This is handler used to put slave processors into spinloop + * while the monarch processor does the mca handling and later + * wake each slave up once the monarch is done. The state + * IA64_MCA_RENDEZ_CHECKIN_DONE indicates the cpu is rendez'ed + * in SAL. The state IA64_MCA_RENDEZ_CHECKIN_NOTDONE indicates + * the cpu has come out of OS rendezvous. + * + * Inputs : None + * Outputs : None + */ +static irqreturn_t +ia64_mca_rendez_int_handler(int rendez_irq, void *arg) +{ + unsigned long flags; + int cpu = smp_processor_id(); + struct ia64_mca_notify_die nd = + { .sos = NULL, .monarch_cpu = &monarch_cpu }; + + /* Mask all interrupts */ + local_irq_save(flags); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_ENTER, get_irq_regs(), (long)&nd, 1); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_DONE; + /* Register with the SAL monarch that the slave has + * reached SAL + */ + ia64_sal_mc_rendez(); + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_PROCESS, get_irq_regs(), (long)&nd, 1); + + /* Wait for the monarch cpu to exit. */ + while (monarch_cpu != -1) + cpu_relax(); /* spin until monarch leaves */ + + NOTIFY_MCA(DIE_MCA_RENDZVOUS_LEAVE, get_irq_regs(), (long)&nd, 1); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + /* Enable all interrupts */ + local_irq_restore(flags); + return IRQ_HANDLED; +} + +/* + * ia64_mca_wakeup_int_handler + * + * The interrupt handler for processing the inter-cpu interrupt to the + * slave cpu which was spinning in the rendez loop. + * Since this spinning is done by turning off the interrupts and + * polling on the wakeup-interrupt bit in the IRR, there is + * nothing useful to be done in the handler. + * + * Inputs : wakeup_irq (Wakeup-interrupt bit) + * arg (Interrupt handler specific argument) + * Outputs : None + * + */ +static irqreturn_t +ia64_mca_wakeup_int_handler(int wakeup_irq, void *arg) +{ + return IRQ_HANDLED; +} + +/* Function pointer for extra MCA recovery */ +int (*ia64_mca_ucmc_extension) + (void*,struct ia64_sal_os_state*) + = NULL; + +int +ia64_reg_MCA_extension(int (*fn)(void *, struct ia64_sal_os_state *)) +{ + if (ia64_mca_ucmc_extension) + return 1; + + ia64_mca_ucmc_extension = fn; + return 0; +} + +void +ia64_unreg_MCA_extension(void) +{ + if (ia64_mca_ucmc_extension) + ia64_mca_ucmc_extension = NULL; +} + +EXPORT_SYMBOL(ia64_reg_MCA_extension); +EXPORT_SYMBOL(ia64_unreg_MCA_extension); + + +static inline void +copy_reg(const u64 *fr, u64 fnat, unsigned long *tr, unsigned long *tnat) +{ + u64 fslot, tslot, nat; + *tr = *fr; + fslot = ((unsigned long)fr >> 3) & 63; + tslot = ((unsigned long)tr >> 3) & 63; + *tnat &= ~(1UL << tslot); + nat = (fnat >> fslot) & 1; + *tnat |= (nat << tslot); +} + +/* Change the comm field on the MCA/INT task to include the pid that + * was interrupted, it makes for easier debugging. If that pid was 0 + * (swapper or nested MCA/INIT) then use the start of the previous comm + * field suffixed with its cpu. + */ + +static void +ia64_mca_modify_comm(const struct task_struct *previous_current) +{ + char *p, comm[sizeof(current->comm)]; + if (previous_current->pid) + snprintf(comm, sizeof(comm), "%s %d", + current->comm, previous_current->pid); + else { + int l; + if ((p = strchr(previous_current->comm, ' '))) + l = p - previous_current->comm; + else + l = strlen(previous_current->comm); + snprintf(comm, sizeof(comm), "%s %*s %d", + current->comm, l, previous_current->comm, + task_thread_info(previous_current)->cpu); + } + memcpy(current->comm, comm, sizeof(current->comm)); +} + +static void +finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos, + unsigned long *nat) +{ + const struct pal_min_state_area *ms = sos->pal_min_state; + const u64 *bank; + + /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use + * pmsa_{xip,xpsr,xfs} + */ + if (ia64_psr(regs)->ic) { + regs->cr_iip = ms->pmsa_iip; + regs->cr_ipsr = ms->pmsa_ipsr; + regs->cr_ifs = ms->pmsa_ifs; + } else { + regs->cr_iip = ms->pmsa_xip; + regs->cr_ipsr = ms->pmsa_xpsr; + regs->cr_ifs = ms->pmsa_xfs; + + sos->iip = ms->pmsa_iip; + sos->ipsr = ms->pmsa_ipsr; + sos->ifs = ms->pmsa_ifs; + } + regs->pr = ms->pmsa_pr; + regs->b0 = ms->pmsa_br0; + regs->ar_rsc = ms->pmsa_rsc; + copy_reg(&ms->pmsa_gr[1-1], ms->pmsa_nat_bits, ®s->r1, nat); + copy_reg(&ms->pmsa_gr[2-1], ms->pmsa_nat_bits, ®s->r2, nat); + copy_reg(&ms->pmsa_gr[3-1], ms->pmsa_nat_bits, ®s->r3, nat); + copy_reg(&ms->pmsa_gr[8-1], ms->pmsa_nat_bits, ®s->r8, nat); + copy_reg(&ms->pmsa_gr[9-1], ms->pmsa_nat_bits, ®s->r9, nat); + copy_reg(&ms->pmsa_gr[10-1], ms->pmsa_nat_bits, ®s->r10, nat); + copy_reg(&ms->pmsa_gr[11-1], ms->pmsa_nat_bits, ®s->r11, nat); + copy_reg(&ms->pmsa_gr[12-1], ms->pmsa_nat_bits, ®s->r12, nat); + copy_reg(&ms->pmsa_gr[13-1], ms->pmsa_nat_bits, ®s->r13, nat); + copy_reg(&ms->pmsa_gr[14-1], ms->pmsa_nat_bits, ®s->r14, nat); + copy_reg(&ms->pmsa_gr[15-1], ms->pmsa_nat_bits, ®s->r15, nat); + if (ia64_psr(regs)->bn) + bank = ms->pmsa_bank1_gr; + else + bank = ms->pmsa_bank0_gr; + copy_reg(&bank[16-16], ms->pmsa_nat_bits, ®s->r16, nat); + copy_reg(&bank[17-16], ms->pmsa_nat_bits, ®s->r17, nat); + copy_reg(&bank[18-16], ms->pmsa_nat_bits, ®s->r18, nat); + copy_reg(&bank[19-16], ms->pmsa_nat_bits, ®s->r19, nat); + copy_reg(&bank[20-16], ms->pmsa_nat_bits, ®s->r20, nat); + copy_reg(&bank[21-16], ms->pmsa_nat_bits, ®s->r21, nat); + copy_reg(&bank[22-16], ms->pmsa_nat_bits, ®s->r22, nat); + copy_reg(&bank[23-16], ms->pmsa_nat_bits, ®s->r23, nat); + copy_reg(&bank[24-16], ms->pmsa_nat_bits, ®s->r24, nat); + copy_reg(&bank[25-16], ms->pmsa_nat_bits, ®s->r25, nat); + copy_reg(&bank[26-16], ms->pmsa_nat_bits, ®s->r26, nat); + copy_reg(&bank[27-16], ms->pmsa_nat_bits, ®s->r27, nat); + copy_reg(&bank[28-16], ms->pmsa_nat_bits, ®s->r28, nat); + copy_reg(&bank[29-16], ms->pmsa_nat_bits, ®s->r29, nat); + copy_reg(&bank[30-16], ms->pmsa_nat_bits, ®s->r30, nat); + copy_reg(&bank[31-16], ms->pmsa_nat_bits, ®s->r31, nat); +} + +/* On entry to this routine, we are running on the per cpu stack, see + * mca_asm.h. The original stack has not been touched by this event. Some of + * the original stack's registers will be in the RBS on this stack. This stack + * also contains a partial pt_regs and switch_stack, the rest of the data is in + * PAL minstate. + * + * The first thing to do is modify the original stack to look like a blocked + * task so we can run backtrace on the original task. Also mark the per cpu + * stack as current to ensure that we use the correct task state, it also means + * that we can do backtrace on the MCA/INIT handler code itself. + */ + +static struct task_struct * +ia64_mca_modify_original_stack(struct pt_regs *regs, + const struct switch_stack *sw, + struct ia64_sal_os_state *sos, + const char *type) +{ + char *p; + ia64_va va; + extern char ia64_leave_kernel[]; /* Need asm address, not function descriptor */ + const struct pal_min_state_area *ms = sos->pal_min_state; + struct task_struct *previous_current; + struct pt_regs *old_regs; + struct switch_stack *old_sw; + unsigned size = sizeof(struct pt_regs) + + sizeof(struct switch_stack) + 16; + unsigned long *old_bspstore, *old_bsp; + unsigned long *new_bspstore, *new_bsp; + unsigned long old_unat, old_rnat, new_rnat, nat; + u64 slots, loadrs = regs->loadrs; + u64 r12 = ms->pmsa_gr[12-1], r13 = ms->pmsa_gr[13-1]; + u64 ar_bspstore = regs->ar_bspstore; + u64 ar_bsp = regs->ar_bspstore + (loadrs >> 16); + const char *msg; + int cpu = smp_processor_id(); + + previous_current = curr_task(cpu); + ia64_set_curr_task(cpu, current); + if ((p = strchr(current->comm, ' '))) + *p = '\0'; + + /* Best effort attempt to cope with MCA/INIT delivered while in + * physical mode. + */ + regs->cr_ipsr = ms->pmsa_ipsr; + if (ia64_psr(regs)->dt == 0) { + va.l = r12; + if (va.f.reg == 0) { + va.f.reg = 7; + r12 = va.l; + } + va.l = r13; + if (va.f.reg == 0) { + va.f.reg = 7; + r13 = va.l; + } + } + if (ia64_psr(regs)->rt == 0) { + va.l = ar_bspstore; + if (va.f.reg == 0) { + va.f.reg = 7; + ar_bspstore = va.l; + } + va.l = ar_bsp; + if (va.f.reg == 0) { + va.f.reg = 7; + ar_bsp = va.l; + } + } + + /* mca_asm.S ia64_old_stack() cannot assume that the dirty registers + * have been copied to the old stack, the old stack may fail the + * validation tests below. So ia64_old_stack() must restore the dirty + * registers from the new stack. The old and new bspstore probably + * have different alignments, so loadrs calculated on the old bsp + * cannot be used to restore from the new bsp. Calculate a suitable + * loadrs for the new stack and save it in the new pt_regs, where + * ia64_old_stack() can get it. + */ + old_bspstore = (unsigned long *)ar_bspstore; + old_bsp = (unsigned long *)ar_bsp; + slots = ia64_rse_num_regs(old_bspstore, old_bsp); + new_bspstore = (unsigned long *)((u64)current + IA64_RBS_OFFSET); + new_bsp = ia64_rse_skip_regs(new_bspstore, slots); + regs->loadrs = (new_bsp - new_bspstore) * 8 << 16; + + /* Verify the previous stack state before we change it */ + if (user_mode(regs)) { + msg = "occurred in user space"; + /* previous_current is guaranteed to be valid when the task was + * in user space, so ... + */ + ia64_mca_modify_comm(previous_current); + goto no_mod; + } + + if (r13 != sos->prev_IA64_KR_CURRENT) { + msg = "inconsistent previous current and r13"; + goto no_mod; + } + + if (!mca_recover_range(ms->pmsa_iip)) { + if ((r12 - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent r12 and r13"; + goto no_mod; + } + if ((ar_bspstore - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bspstore and r13"; + goto no_mod; + } + va.p = old_bspstore; + if (va.f.reg < 5) { + msg = "old_bspstore is in the wrong region"; + goto no_mod; + } + if ((ar_bsp - r13) >= KERNEL_STACK_SIZE) { + msg = "inconsistent ar.bsp and r13"; + goto no_mod; + } + size += (ia64_rse_skip_regs(old_bspstore, slots) - old_bspstore) * 8; + if (ar_bspstore + size > r12) { + msg = "no room for blocked state"; + goto no_mod; + } + } + + ia64_mca_modify_comm(previous_current); + + /* Make the original task look blocked. First stack a struct pt_regs, + * describing the state at the time of interrupt. mca_asm.S built a + * partial pt_regs, copy it and fill in the blanks using minstate. + */ + p = (char *)r12 - sizeof(*regs); + old_regs = (struct pt_regs *)p; + memcpy(old_regs, regs, sizeof(*regs)); + old_regs->loadrs = loadrs; + old_unat = old_regs->ar_unat; + finish_pt_regs(old_regs, sos, &old_unat); + + /* Next stack a struct switch_stack. mca_asm.S built a partial + * switch_stack, copy it and fill in the blanks using pt_regs and + * minstate. + * + * In the synthesized switch_stack, b0 points to ia64_leave_kernel, + * ar.pfs is set to 0. + * + * unwind.c::unw_unwind() does special processing for interrupt frames. + * It checks if the PRED_NON_SYSCALL predicate is set, if the predicate + * is clear then unw_unwind() does _not_ adjust bsp over pt_regs. Not + * that this is documented, of course. Set PRED_NON_SYSCALL in the + * switch_stack on the original stack so it will unwind correctly when + * unwind.c reads pt_regs. + * + * thread.ksp is updated to point to the synthesized switch_stack. + */ + p -= sizeof(struct switch_stack); + old_sw = (struct switch_stack *)p; + memcpy(old_sw, sw, sizeof(*sw)); + old_sw->caller_unat = old_unat; + old_sw->ar_fpsr = old_regs->ar_fpsr; + copy_reg(&ms->pmsa_gr[4-1], ms->pmsa_nat_bits, &old_sw->r4, &old_unat); + copy_reg(&ms->pmsa_gr[5-1], ms->pmsa_nat_bits, &old_sw->r5, &old_unat); + copy_reg(&ms->pmsa_gr[6-1], ms->pmsa_nat_bits, &old_sw->r6, &old_unat); + copy_reg(&ms->pmsa_gr[7-1], ms->pmsa_nat_bits, &old_sw->r7, &old_unat); + old_sw->b0 = (u64)ia64_leave_kernel; + old_sw->b1 = ms->pmsa_br1; + old_sw->ar_pfs = 0; + old_sw->ar_unat = old_unat; + old_sw->pr = old_regs->pr | (1UL << PRED_NON_SYSCALL); + previous_current->thread.ksp = (u64)p - 16; + + /* Finally copy the original stack's registers back to its RBS. + * Registers from ar.bspstore through ar.bsp at the time of the event + * are in the current RBS, copy them back to the original stack. The + * copy must be done register by register because the original bspstore + * and the current one have different alignments, so the saved RNAT + * data occurs at different places. + * + * mca_asm does cover, so the old_bsp already includes all registers at + * the time of MCA/INIT. It also does flushrs, so all registers before + * this function have been written to backing store on the MCA/INIT + * stack. + */ + new_rnat = ia64_get_rnat(ia64_rse_rnat_addr(new_bspstore)); + old_rnat = regs->ar_rnat; + while (slots--) { + if (ia64_rse_is_rnat_slot(new_bspstore)) { + new_rnat = ia64_get_rnat(new_bspstore++); + } + if (ia64_rse_is_rnat_slot(old_bspstore)) { + *old_bspstore++ = old_rnat; + old_rnat = 0; + } + nat = (new_rnat >> ia64_rse_slot_num(new_bspstore)) & 1UL; + old_rnat &= ~(1UL << ia64_rse_slot_num(old_bspstore)); + old_rnat |= (nat << ia64_rse_slot_num(old_bspstore)); + *old_bspstore++ = *new_bspstore++; + } + old_sw->ar_bspstore = (unsigned long)old_bspstore; + old_sw->ar_rnat = old_rnat; + + sos->prev_task = previous_current; + return previous_current; + +no_mod: + mprintk(KERN_INFO "cpu %d, %s %s, original stack not modified\n", + smp_processor_id(), type, msg); + old_unat = regs->ar_unat; + finish_pt_regs(regs, sos, &old_unat); + return previous_current; +} + +/* The monarch/slave interaction is based on monarch_cpu and requires that all + * slaves have entered rendezvous before the monarch leaves. If any cpu has + * not entered rendezvous yet then wait a bit. The assumption is that any + * slave that has not rendezvoused after a reasonable time is never going to do + * so. In this context, slave includes cpus that respond to the MCA rendezvous + * interrupt, as well as cpus that receive the INIT slave event. + */ + +static void +ia64_wait_for_slaves(int monarch, const char *type) +{ + int c, i , wait; + + /* + * wait 5 seconds total for slaves (arbitrary) + */ + for (i = 0; i < 5000; i++) { + wait = 0; + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] + == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) { + udelay(1000); /* short wait */ + wait = 1; + break; + } + } + if (!wait) + goto all_in; + } + + /* + * Maybe slave(s) dead. Print buffered messages immediately. + */ + ia64_mlogbuf_finish(0); + mprintk(KERN_INFO "OS %s slave did not rendezvous on cpu", type); + for_each_online_cpu(c) { + if (c == monarch) + continue; + if (ia64_mc_info.imi_rendez_checkin[c] == IA64_MCA_RENDEZ_CHECKIN_NOTDONE) + mprintk(" %d", c); + } + mprintk("\n"); + return; + +all_in: + mprintk(KERN_INFO "All OS %s slaves have reached rendezvous\n", type); + return; +} + +/* mca_insert_tr + * + * Switch rid when TR reload and needed! + * iord: 1: itr, 2: itr; + * +*/ +static void mca_insert_tr(u64 iord) +{ + + int i; + u64 old_rr; + struct ia64_tr_entry *p; + unsigned long psr; + int cpu = smp_processor_id(); + + if (!ia64_idtrs[cpu]) + return; + + psr = ia64_clear_ic(); + for (i = IA64_TR_ALLOC_BASE; i < IA64_TR_ALLOC_MAX; i++) { + p = ia64_idtrs[cpu] + (iord - 1) * IA64_TR_ALLOC_MAX; + if (p->pte & 0x1) { + old_rr = ia64_get_rr(p->ifa); + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, p->rr); + ia64_srlz_d(); + } + ia64_ptr(iord, p->ifa, p->itir >> 2); + ia64_srlz_i(); + if (iord & 0x1) { + ia64_itr(0x1, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (iord & 0x2) { + ia64_itr(0x2, i, p->ifa, p->pte, p->itir >> 2); + ia64_srlz_i(); + } + if (old_rr != p->rr) { + ia64_set_rr(p->ifa, old_rr); + ia64_srlz_d(); + } + } + } + ia64_set_psr(psr); +} + +/* + * ia64_mca_handler + * + * This is uncorrectable machine check handler called from OS_MCA + * dispatch code which is in turn called from SAL_CHECK(). + * This is the place where the core of OS MCA handling is done. + * Right now the logs are extracted and displayed in a well-defined + * format. This handler code is supposed to be run only on the + * monarch processor. Once the monarch is done with MCA handling + * further MCA logging is enabled by clearing logs. + * Monarch also has the duty of sending wakeup-IPIs to pull the + * slave processors out of rendezvous spinloop. + * + * If multiple processors call into OS_MCA, the first will become + * the monarch. Subsequent cpus will be recorded in the mca_cpu + * bitmask. After the first monarch has processed its MCA, it + * will wake up the next cpu in the mca_cpu bitmask and then go + * into the rendezvous loop. When all processors have serviced + * their MCA, the last monarch frees up the rest of the processors. + */ +void +ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, + struct ia64_sal_os_state *sos) +{ + int recover, cpu = smp_processor_id(); + struct task_struct *previous_current; + struct ia64_mca_notify_die nd = + { .sos = sos, .monarch_cpu = &monarch_cpu, .data = &recover }; + static atomic_t mca_count; + static cpumask_t mca_cpu; + + if (atomic_add_return(1, &mca_count) == 1) { + monarch_cpu = cpu; + sos->monarch = 1; + } else { + cpumask_set_cpu(cpu, &mca_cpu); + sos->monarch = 0; + } + mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " + "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); + + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); + + NOTIFY_MCA(DIE_MCA_MONARCH_ENTER, regs, (long)&nd, 1); + + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA; + if (sos->monarch) { + ia64_wait_for_slaves(cpu, "MCA"); + + /* Wakeup all the processors which are spinning in the + * rendezvous loop. They will leave SAL, then spin in the OS + * with interrupts disabled until this monarch cpu leaves the + * MCA handler. That gets control back to the OS so we can + * backtrace the other cpus, backtrace when spinning in SAL + * does not work. + */ + ia64_mca_wakeup_all(); + } else { + while (cpumask_test_cpu(cpu, &mca_cpu)) + cpu_relax(); /* spin until monarch wakes us */ + } + + NOTIFY_MCA(DIE_MCA_MONARCH_PROCESS, regs, (long)&nd, 1); + + /* Get the MCA error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_MCA); + + /* MCA error recovery */ + recover = (ia64_mca_ucmc_extension + && ia64_mca_ucmc_extension( + IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA), + sos)); + + if (recover) { + sal_log_record_header_t *rh = IA64_LOG_CURR_BUFFER(SAL_INFO_TYPE_MCA); + rh->severity = sal_log_severity_corrected; + ia64_sal_clear_state_info(SAL_INFO_TYPE_MCA); + sos->os_status = IA64_MCA_CORRECTED; + } else { + /* Dump buffered message to console */ + ia64_mlogbuf_finish(1); + } + + if (__this_cpu_read(ia64_mca_tr_reload)) { + mca_insert_tr(0x1); /*Reload dynamic itrs*/ + mca_insert_tr(0x2); /*Reload dynamic itrs*/ + } + + NOTIFY_MCA(DIE_MCA_MONARCH_LEAVE, regs, (long)&nd, 1); + + if (atomic_dec_return(&mca_count) > 0) { + int i; + + /* wake up the next monarch cpu, + * and put this cpu in the rendez loop. + */ + for_each_online_cpu(i) { + if (cpumask_test_cpu(i, &mca_cpu)) { + monarch_cpu = i; + cpumask_clear_cpu(i, &mca_cpu); /* wake next cpu */ + while (monarch_cpu != -1) + cpu_relax(); /* spin until last cpu leaves */ + ia64_set_curr_task(cpu, previous_current); + ia64_mc_info.imi_rendez_checkin[cpu] + = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + return; + } + } + } + ia64_set_curr_task(cpu, previous_current); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + monarch_cpu = -1; /* This frees the slaves and previous monarchs */ +} + +static DECLARE_WORK(cmc_disable_work, ia64_mca_cmc_vector_disable_keventd); +static DECLARE_WORK(cmc_enable_work, ia64_mca_cmc_vector_enable_keventd); + +/* + * ia64_mca_cmc_int_handler + * + * This is corrected machine check interrupt handler. + * Right now the logs are extracted and displayed in a well-defined + * format. + * + * Inputs + * interrupt number + * client data arg ptr + * + * Outputs + * None + */ +static irqreturn_t +ia64_mca_cmc_int_handler(int cmc_irq, void *arg) +{ + static unsigned long cmc_history[CMC_HISTORY_LENGTH]; + static int index; + static DEFINE_SPINLOCK(cmc_history_lock); + + IA64_MCA_DEBUG("%s: received interrupt vector = %#x on CPU %d\n", + __func__, cmc_irq, smp_processor_id()); + + /* SAL spec states this should run w/ interrupts enabled */ + local_irq_enable(); + + spin_lock(&cmc_history_lock); + if (!cmc_polling_enabled) { + int i, count = 1; /* we know 1 happened now */ + unsigned long now = jiffies; + + for (i = 0; i < CMC_HISTORY_LENGTH; i++) { + if (now - cmc_history[i] <= HZ) + count++; + } + + IA64_MCA_DEBUG(KERN_INFO "CMC threshold %d/%d\n", count, CMC_HISTORY_LENGTH); + if (count >= CMC_HISTORY_LENGTH) { + + cmc_polling_enabled = 1; + spin_unlock(&cmc_history_lock); + /* If we're being hit with CMC interrupts, we won't + * ever execute the schedule_work() below. Need to + * disable CMC interrupts on this processor now. + */ + ia64_mca_cmc_vector_disable(NULL); + schedule_work(&cmc_disable_work); + + /* + * Corrected errors will still be corrected, but + * make sure there's a log somewhere that indicates + * something is generating more than we can handle. + */ + printk(KERN_WARNING "WARNING: Switching to polling CMC handler; error records may be lost\n"); + + mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); + + /* lock already released, get out now */ + goto out; + } else { + cmc_history[index++] = now; + if (index == CMC_HISTORY_LENGTH) + index = 0; + } + } + spin_unlock(&cmc_history_lock); +out: + /* Get the CMC error record and log it */ + ia64_mca_log_sal_error_record(SAL_INFO_TYPE_CMC); + + local_irq_disable(); + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cmc_int_caller + * + * Triggered by sw interrupt from CMC polling routine. Calls + * real interrupt handler and either triggers a sw interrupt + * on the next cpu or does cleanup at the end. + * + * Inputs + * interrupt number + * client data arg ptr + * Outputs + * handled + */ +static irqreturn_t +ia64_mca_cmc_int_caller(int cmc_irq, void *arg) +{ + static int start_count = -1; + unsigned int cpuid; + + cpuid = smp_processor_id(); + + /* If first cpu, update count */ + if (start_count == -1) + start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CMC); + + ia64_mca_cmc_int_handler(cmc_irq, arg); + + cpuid = cpumask_next(cpuid+1, cpu_online_mask); + + if (cpuid < nr_cpu_ids) { + ia64_send_ipi(cpuid, IA64_CMCP_VECTOR, IA64_IPI_DM_INT, 0); + } else { + /* If no log record, switch out of polling mode */ + if (start_count == IA64_LOG_COUNT(SAL_INFO_TYPE_CMC)) { + + printk(KERN_WARNING "Returning to interrupt driven CMC handler\n"); + schedule_work(&cmc_enable_work); + cmc_polling_enabled = 0; + + } else { + + mod_timer(&cmc_poll_timer, jiffies + CMC_POLL_INTERVAL); + } + + start_count = -1; + } + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cmc_poll + * + * Poll for Corrected Machine Checks (CMCs) + * + * Inputs : dummy(unused) + * Outputs : None + * + */ +static void +ia64_mca_cmc_poll (struct timer_list *unused) +{ + /* Trigger a CMC interrupt cascade */ + ia64_send_ipi(cpumask_first(cpu_online_mask), IA64_CMCP_VECTOR, + IA64_IPI_DM_INT, 0); +} + +/* + * ia64_mca_cpe_int_caller + * + * Triggered by sw interrupt from CPE polling routine. Calls + * real interrupt handler and either triggers a sw interrupt + * on the next cpu or does cleanup at the end. + * + * Inputs + * interrupt number + * client data arg ptr + * Outputs + * handled + */ +static irqreturn_t +ia64_mca_cpe_int_caller(int cpe_irq, void *arg) +{ + static int start_count = -1; + static int poll_time = MIN_CPE_POLL_INTERVAL; + unsigned int cpuid; + + cpuid = smp_processor_id(); + + /* If first cpu, update count */ + if (start_count == -1) + start_count = IA64_LOG_COUNT(SAL_INFO_TYPE_CPE); + + ia64_mca_cpe_int_handler(cpe_irq, arg); + + cpuid = cpumask_next(cpuid+1, cpu_online_mask); + + if (cpuid < NR_CPUS) { + ia64_send_ipi(cpuid, IA64_CPEP_VECTOR, IA64_IPI_DM_INT, 0); + } else { + /* + * If a log was recorded, increase our polling frequency, + * otherwise, backoff or return to interrupt mode. + */ + if (start_count != IA64_LOG_COUNT(SAL_INFO_TYPE_CPE)) { + poll_time = max(MIN_CPE_POLL_INTERVAL, poll_time / 2); + } else if (cpe_vector < 0) { + poll_time = min(MAX_CPE_POLL_INTERVAL, poll_time * 2); + } else { + poll_time = MIN_CPE_POLL_INTERVAL; + + printk(KERN_WARNING "Returning to interrupt driven CPE handler\n"); + enable_irq(local_vector_to_irq(IA64_CPE_VECTOR)); + cpe_poll_enabled = 0; + } + + if (cpe_poll_enabled) + mod_timer(&cpe_poll_timer, jiffies + poll_time); + start_count = -1; + } + + return IRQ_HANDLED; +} + +/* + * ia64_mca_cpe_poll + * + * Poll for Corrected Platform Errors (CPEs), trigger interrupt + * on first cpu, from there it will trickle through all the cpus. + * + * Inputs : dummy(unused) + * Outputs : None + * + */ +static void +ia64_mca_cpe_poll (struct timer_list *unused) +{ + /* Trigger a CPE interrupt cascade */ + ia64_send_ipi(cpumask_first(cpu_online_mask), IA64_CPEP_VECTOR, + IA64_IPI_DM_INT, 0); +} + +static int +default_monarch_init_process(struct notifier_block *self, unsigned long val, void *data) +{ + int c; + struct task_struct *g, *t; + if (val != DIE_INIT_MONARCH_PROCESS) + return NOTIFY_DONE; +#ifdef CONFIG_KEXEC + if (atomic_read(&kdump_in_progress)) + return NOTIFY_DONE; +#endif + + /* + * FIXME: mlogbuf will brim over with INIT stack dumps. + * To enable show_stack from INIT, we use oops_in_progress which should + * be used in real oops. This would cause something wrong after INIT. + */ + BREAK_LOGLEVEL(console_loglevel); + ia64_mlogbuf_dump_from_init(); + + printk(KERN_ERR "Processes interrupted by INIT -"); + for_each_online_cpu(c) { + struct ia64_sal_os_state *s; + t = __va(__per_cpu_mca[c] + IA64_MCA_CPU_INIT_STACK_OFFSET); + s = (struct ia64_sal_os_state *)((char *)t + MCA_SOS_OFFSET); + g = s->prev_task; + if (g) { + if (g->pid) + printk(" %d", g->pid); + else + printk(" %d (cpu %d task 0x%p)", g->pid, task_cpu(g), g); + } + } + printk("\n\n"); + if (read_trylock(&tasklist_lock)) { + for_each_process_thread(g, t) { + printk("\nBacktrace of pid %d (%s)\n", t->pid, t->comm); + show_stack(t, NULL, KERN_DEFAULT); + } + read_unlock(&tasklist_lock); + } + /* FIXME: This will not restore zapped printk locks. */ + RESTORE_LOGLEVEL(console_loglevel); + return NOTIFY_DONE; +} + +/* + * C portion of the OS INIT handler + * + * Called from ia64_os_init_dispatch + * + * Inputs: pointer to pt_regs where processor info was saved. SAL/OS state for + * this event. This code is used for both monarch and slave INIT events, see + * sos->monarch. + * + * All INIT events switch to the INIT stack and change the previous process to + * blocked status. If one of the INIT events is the monarch then we are + * probably processing the nmi button/command. Use the monarch cpu to dump all + * the processes. The slave INIT events all spin until the monarch cpu + * returns. We can also get INIT slave events for MCA, in which case the MCA + * process is the monarch. + */ + +void +ia64_init_handler(struct pt_regs *regs, struct switch_stack *sw, + struct ia64_sal_os_state *sos) +{ + static atomic_t slaves; + static atomic_t monarchs; + struct task_struct *previous_current; + int cpu = smp_processor_id(); + struct ia64_mca_notify_die nd = + { .sos = sos, .monarch_cpu = &monarch_cpu }; + + NOTIFY_INIT(DIE_INIT_ENTER, regs, (long)&nd, 0); + + mprintk(KERN_INFO "Entered OS INIT handler. PSP=%lx cpu=%d monarch=%ld\n", + sos->proc_state_param, cpu, sos->monarch); + salinfo_log_wakeup(SAL_INFO_TYPE_INIT, NULL, 0, 0); + + previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "INIT"); + sos->os_status = IA64_INIT_RESUME; + + /* FIXME: Workaround for broken proms that drive all INIT events as + * slaves. The last slave that enters is promoted to be a monarch. + * Remove this code in September 2006, that gives platforms a year to + * fix their proms and get their customers updated. + */ + if (!sos->monarch && atomic_add_return(1, &slaves) == num_online_cpus()) { + mprintk(KERN_WARNING "%s: Promoting cpu %d to monarch.\n", + __func__, cpu); + atomic_dec(&slaves); + sos->monarch = 1; + } + + /* FIXME: Workaround for broken proms that drive all INIT events as + * monarchs. Second and subsequent monarchs are demoted to slaves. + * Remove this code in September 2006, that gives platforms a year to + * fix their proms and get their customers updated. + */ + if (sos->monarch && atomic_add_return(1, &monarchs) > 1) { + mprintk(KERN_WARNING "%s: Demoting cpu %d to slave.\n", + __func__, cpu); + atomic_dec(&monarchs); + sos->monarch = 0; + } + + if (!sos->monarch) { + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_INIT; + +#ifdef CONFIG_KEXEC + while (monarch_cpu == -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else + while (monarch_cpu == -1) + cpu_relax(); /* spin until monarch enters */ +#endif + + NOTIFY_INIT(DIE_INIT_SLAVE_ENTER, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_SLAVE_PROCESS, regs, (long)&nd, 1); + +#ifdef CONFIG_KEXEC + while (monarch_cpu != -1 && !atomic_read(&kdump_in_progress)) + udelay(1000); +#else + while (monarch_cpu != -1) + cpu_relax(); /* spin until monarch leaves */ +#endif + + NOTIFY_INIT(DIE_INIT_SLAVE_LEAVE, regs, (long)&nd, 1); + + mprintk("Slave on cpu %d returning to normal service.\n", cpu); + ia64_set_curr_task(cpu, previous_current); + ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + atomic_dec(&slaves); + return; + } + + monarch_cpu = cpu; + NOTIFY_INIT(DIE_INIT_MONARCH_ENTER, regs, (long)&nd, 1); + + /* + * Wait for a bit. On some machines (e.g., HP's zx2000 and zx6000, INIT can be + * generated via the BMC's command-line interface, but since the console is on the + * same serial line, the user will need some time to switch out of the BMC before + * the dump begins. + */ + mprintk("Delaying for 5 seconds...\n"); + udelay(5*1000000); + ia64_wait_for_slaves(cpu, "INIT"); + /* If nobody intercepts DIE_INIT_MONARCH_PROCESS then we drop through + * to default_monarch_init_process() above and just print all the + * tasks. + */ + NOTIFY_INIT(DIE_INIT_MONARCH_PROCESS, regs, (long)&nd, 1); + NOTIFY_INIT(DIE_INIT_MONARCH_LEAVE, regs, (long)&nd, 1); + + mprintk("\nINIT dump complete. Monarch on cpu %d returning to normal service.\n", cpu); + atomic_dec(&monarchs); + ia64_set_curr_task(cpu, previous_current); + monarch_cpu = -1; + return; +} + +static int __init +ia64_mca_disable_cpe_polling(char *str) +{ + cpe_poll_enabled = 0; + return 1; +} + +__setup("disable_cpe_poll", ia64_mca_disable_cpe_polling); + +/* Minimal format of the MCA/INIT stacks. The pseudo processes that run on + * these stacks can never sleep, they cannot return from the kernel to user + * space, they do not appear in a normal ps listing. So there is no need to + * format most of the fields. + */ + +static void +format_mca_init_stack(void *mca_data, unsigned long offset, + const char *type, int cpu) +{ + struct task_struct *p = (struct task_struct *)((char *)mca_data + offset); + struct thread_info *ti; + memset(p, 0, KERNEL_STACK_SIZE); + ti = task_thread_info(p); + ti->flags = _TIF_MCA_INIT; + ti->preempt_count = 1; + ti->task = p; + ti->cpu = cpu; + p->stack = ti; + p->__state = TASK_UNINTERRUPTIBLE; + cpumask_set_cpu(cpu, &p->cpus_mask); + INIT_LIST_HEAD(&p->tasks); + p->parent = p->real_parent = p->group_leader = p; + INIT_LIST_HEAD(&p->children); + INIT_LIST_HEAD(&p->sibling); + strscpy(p->comm, type, sizeof(p->comm)-1); +} + +/* Caller prevents this from being called after init */ +static void * __ref mca_bootmem(void) +{ + return memblock_alloc(sizeof(struct ia64_mca_cpu), KERNEL_STACK_SIZE); +} + +/* Do per-CPU MCA-related initialization. */ +void +ia64_mca_cpu_init(void *cpu_data) +{ + void *pal_vaddr; + void *data; + long sz = sizeof(struct ia64_mca_cpu); + int cpu = smp_processor_id(); + static int first_time = 1; + + /* + * Structure will already be allocated if cpu has been online, + * then offlined. + */ + if (__per_cpu_mca[cpu]) { + data = __va(__per_cpu_mca[cpu]); + } else { + if (first_time) { + data = mca_bootmem(); + first_time = 0; + } else + data = (void *)__get_free_pages(GFP_ATOMIC, + get_order(sz)); + if (!data) + panic("Could not allocate MCA memory for cpu %d\n", + cpu); + } + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, mca_stack), + "MCA", cpu); + format_mca_init_stack(data, offsetof(struct ia64_mca_cpu, init_stack), + "INIT", cpu); + __this_cpu_write(ia64_mca_data, (__per_cpu_mca[cpu] = __pa(data))); + + /* + * Stash away a copy of the PTE needed to map the per-CPU page. + * We may need it during MCA recovery. + */ + __this_cpu_write(ia64_mca_per_cpu_pte, + pte_val(mk_pte_phys(__pa(cpu_data), PAGE_KERNEL))); + + /* + * Also, stash away a copy of the PAL address and the PTE + * needed to map it. + */ + pal_vaddr = efi_get_pal_addr(); + if (!pal_vaddr) + return; + __this_cpu_write(ia64_mca_pal_base, + GRANULEROUNDDOWN((unsigned long) pal_vaddr)); + __this_cpu_write(ia64_mca_pal_pte, pte_val(mk_pte_phys(__pa(pal_vaddr), + PAGE_KERNEL))); +} + +static int ia64_mca_cpu_online(unsigned int cpu) +{ + unsigned long flags; + + local_irq_save(flags); + if (!cmc_polling_enabled) + ia64_mca_cmc_vector_enable(NULL); + local_irq_restore(flags); + return 0; +} + +/* + * ia64_mca_init + * + * Do all the system level mca specific initialization. + * + * 1. Register spinloop and wakeup request interrupt vectors + * + * 2. Register OS_MCA handler entry point + * + * 3. Register OS_INIT handler entry point + * + * 4. Initialize MCA/CMC/INIT related log buffers maintained by the OS. + * + * Note that this initialization is done very early before some kernel + * services are available. + * + * Inputs : None + * + * Outputs : None + */ +void __init +ia64_mca_init(void) +{ + ia64_fptr_t *init_hldlr_ptr_monarch = (ia64_fptr_t *)ia64_os_init_dispatch_monarch; + ia64_fptr_t *init_hldlr_ptr_slave = (ia64_fptr_t *)ia64_os_init_dispatch_slave; + ia64_fptr_t *mca_hldlr_ptr = (ia64_fptr_t *)ia64_os_mca_dispatch; + int i; + long rc; + struct ia64_sal_retval isrv; + unsigned long timeout = IA64_MCA_RENDEZ_TIMEOUT; /* platform specific */ + static struct notifier_block default_init_monarch_nb = { + .notifier_call = default_monarch_init_process, + .priority = 0/* we need to notified last */ + }; + + IA64_MCA_DEBUG("%s: begin\n", __func__); + + /* Clear the Rendez checkin flag for all cpus */ + for(i = 0 ; i < NR_CPUS; i++) + ia64_mc_info.imi_rendez_checkin[i] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE; + + /* + * Register the rendezvous spinloop and wakeup mechanism with SAL + */ + + /* Register the rendezvous interrupt vector with SAL */ + while (1) { + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_INT, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_RENDEZ_VECTOR, + timeout, + SAL_MC_PARAM_RZ_ALWAYS); + rc = isrv.status; + if (rc == 0) + break; + if (rc == -2) { + printk(KERN_INFO "Increasing MCA rendezvous timeout from " + "%ld to %ld milliseconds\n", timeout, isrv.v0); + timeout = isrv.v0; + NOTIFY_MCA(DIE_MCA_NEW_TIMEOUT, NULL, timeout, 0); + continue; + } + printk(KERN_ERR "Failed to register rendezvous interrupt " + "with SAL (status %ld)\n", rc); + return; + } + + /* Register the wakeup interrupt vector with SAL */ + isrv = ia64_sal_mc_set_params(SAL_MC_PARAM_RENDEZ_WAKEUP, + SAL_MC_PARAM_MECHANISM_INT, + IA64_MCA_WAKEUP_VECTOR, + 0, 0); + rc = isrv.status; + if (rc) { + printk(KERN_ERR "Failed to register wakeup interrupt with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered MCA rendezvous spinloop and wakeup mech.\n", __func__); + + ia64_mc_info.imi_mca_handler = ia64_tpa(mca_hldlr_ptr->fp); + /* + * XXX - disable SAL checksum by setting size to 0; should be + * ia64_tpa(ia64_os_mca_dispatch_end) - ia64_tpa(ia64_os_mca_dispatch); + */ + ia64_mc_info.imi_mca_handler_size = 0; + + /* Register the os mca handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_MCA, + ia64_mc_info.imi_mca_handler, + ia64_tpa(mca_hldlr_ptr->gp), + ia64_mc_info.imi_mca_handler_size, + 0, 0, 0))) + { + printk(KERN_ERR "Failed to register OS MCA handler with SAL " + "(status %ld)\n", rc); + return; + } + + IA64_MCA_DEBUG("%s: registered OS MCA handler with SAL at 0x%lx, gp = 0x%lx\n", __func__, + ia64_mc_info.imi_mca_handler, ia64_tpa(mca_hldlr_ptr->gp)); + + /* + * XXX - disable SAL checksum by setting size to 0, should be + * size of the actual init handler in mca_asm.S. + */ + ia64_mc_info.imi_monarch_init_handler = ia64_tpa(init_hldlr_ptr_monarch->fp); + ia64_mc_info.imi_monarch_init_handler_size = 0; + ia64_mc_info.imi_slave_init_handler = ia64_tpa(init_hldlr_ptr_slave->fp); + ia64_mc_info.imi_slave_init_handler_size = 0; + + IA64_MCA_DEBUG("%s: OS INIT handler at %lx\n", __func__, + ia64_mc_info.imi_monarch_init_handler); + + /* Register the os init handler with SAL */ + if ((rc = ia64_sal_set_vectors(SAL_VECTOR_OS_INIT, + ia64_mc_info.imi_monarch_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_monarch_init_handler_size, + ia64_mc_info.imi_slave_init_handler, + ia64_tpa(ia64_getreg(_IA64_REG_GP)), + ia64_mc_info.imi_slave_init_handler_size))) + { + printk(KERN_ERR "Failed to register m/s INIT handlers with SAL " + "(status %ld)\n", rc); + return; + } + if (register_die_notifier(&default_init_monarch_nb)) { + printk(KERN_ERR "Failed to register default monarch INIT process\n"); + return; + } + + IA64_MCA_DEBUG("%s: registered OS INIT handler with SAL\n", __func__); + + /* Initialize the areas set aside by the OS to buffer the + * platform/processor error states for MCA/INIT/CMC + * handling. + */ + ia64_log_init(SAL_INFO_TYPE_MCA); + ia64_log_init(SAL_INFO_TYPE_INIT); + ia64_log_init(SAL_INFO_TYPE_CMC); + ia64_log_init(SAL_INFO_TYPE_CPE); + + mca_init = 1; + printk(KERN_INFO "MCA related initialization done\n"); +} + + +/* + * These pieces cannot be done in ia64_mca_init() because it is called before + * early_irq_init() which would wipe out our percpu irq registrations. But we + * cannot leave them until ia64_mca_late_init() because by then all the other + * processors have been brought online and have set their own CMC vectors to + * point at a non-existant action. Called from arch_early_irq_init(). + */ +void __init ia64_mca_irq_init(void) +{ + /* + * Configure the CMCI/P vector and handler. Interrupts for CMC are + * per-processor, so AP CMC interrupts are setup in smp_callin() (smpboot.c). + */ + register_percpu_irq(IA64_CMC_VECTOR, ia64_mca_cmc_int_handler, 0, + "cmc_hndlr"); + register_percpu_irq(IA64_CMCP_VECTOR, ia64_mca_cmc_int_caller, 0, + "cmc_poll"); + ia64_mca_cmc_vector_setup(); /* Setup vector on BSP */ + + /* Setup the MCA rendezvous interrupt vector */ + register_percpu_irq(IA64_MCA_RENDEZ_VECTOR, ia64_mca_rendez_int_handler, + 0, "mca_rdzv"); + + /* Setup the MCA wakeup interrupt vector */ + register_percpu_irq(IA64_MCA_WAKEUP_VECTOR, ia64_mca_wakeup_int_handler, + 0, "mca_wkup"); + + /* Setup the CPEI/P handler */ + register_percpu_irq(IA64_CPEP_VECTOR, ia64_mca_cpe_int_caller, 0, + "cpe_poll"); +} + +/* + * ia64_mca_late_init + * + * Opportunity to setup things that require initialization later + * than ia64_mca_init. Setup a timer to poll for CPEs if the + * platform doesn't support an interrupt driven mechanism. + * + * Inputs : None + * Outputs : Status + */ +static int __init +ia64_mca_late_init(void) +{ + if (!mca_init) + return 0; + + /* Setup the CMCI/P vector and handler */ + timer_setup(&cmc_poll_timer, ia64_mca_cmc_poll, 0); + + /* Unmask/enable the vector */ + cmc_polling_enabled = 0; + cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/mca:online", + ia64_mca_cpu_online, NULL); + IA64_MCA_DEBUG("%s: CMCI/P setup and enabled.\n", __func__); + + /* Setup the CPEI/P vector and handler */ + cpe_vector = acpi_request_vector(ACPI_INTERRUPT_CPEI); + timer_setup(&cpe_poll_timer, ia64_mca_cpe_poll, 0); + + { + unsigned int irq; + + if (cpe_vector >= 0) { + /* If platform supports CPEI, enable the irq. */ + irq = local_vector_to_irq(cpe_vector); + if (irq > 0) { + cpe_poll_enabled = 0; + irq_set_status_flags(irq, IRQ_PER_CPU); + if (request_irq(irq, ia64_mca_cpe_int_handler, + 0, "cpe_hndlr", NULL)) + pr_err("Failed to register cpe_hndlr interrupt\n"); + ia64_cpe_irq = irq; + ia64_mca_register_cpev(cpe_vector); + IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", + __func__); + return 0; + } + printk(KERN_ERR "%s: Failed to find irq for CPE " + "interrupt handler, vector %d\n", + __func__, cpe_vector); + } + /* If platform doesn't support CPEI, get the timer going. */ + if (cpe_poll_enabled) { + ia64_mca_cpe_poll(0UL); + IA64_MCA_DEBUG("%s: CPEP setup and enabled.\n", __func__); + } + } + + return 0; +} + +device_initcall(ia64_mca_late_init); diff --git a/arch/ia64/kernel/mca_asm.S b/arch/ia64/kernel/mca_asm.S new file mode 100644 index 000000000000..0d6b8cf9d1d0 --- /dev/null +++ b/arch/ia64/kernel/mca_asm.S @@ -0,0 +1,1123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * File: mca_asm.S + * Purpose: assembly portion of the IA64 MCA handling + * + * Mods by cfleck to integrate into kernel build + * + * 2000-03-15 David Mosberger-Tang + * Added various stop bits to get a clean compile + * + * 2000-03-29 Chuck Fleckenstein + * Added code to save INIT handoff state in pt_regs format, + * switch to temp kstack, switch modes, jump to C INIT handler + * + * 2002-01-04 J.Hall + * Before entering virtual mode code: + * 1. Check for TLB CPU error + * 2. Restore current thread pointer to kr6 + * 3. Move stack ptr 16 bytes to conform to C calling convention + * + * 2004-11-12 Russ Anderson + * Added per cpu MCA/INIT stack save areas. + * + * 2005-12-08 Keith Owens + * Use per cpu MCA/INIT stacks for all data. + */ +#include +#include + +#include +#include +#include +#include + +#include "entry.h" + +#define GET_IA64_MCA_DATA(reg) \ + GET_THIS_PADDR(reg, ia64_mca_data) \ + ;; \ + ld8 reg=[reg] + + .global ia64_do_tlb_purge + .global ia64_os_mca_dispatch + .global ia64_os_init_on_kdump + .global ia64_os_init_dispatch_monarch + .global ia64_os_init_dispatch_slave + + .text + .align 16 + +//StartMain//////////////////////////////////////////////////////////////////// + +/* + * Just the TLB purge part is moved to a separate function + * so we can re-use the code for cpu hotplug code as well + * Caller should now setup b1, so we can branch once the + * tlb flush is complete. + */ + +ia64_do_tlb_purge: +#define O(member) IA64_CPUINFO_##member##_OFFSET + + GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=0 + ;; + adds r20=-1,r20 + ;; +#undef O + +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i // srlz.i implies srlz.d + ;; + + // Now purge addresses formerly mapped by TR registers + // 1. Purge ITR&DTR for kernel. + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + srlz.d + ;; + // 3. Purge ITR for PAL code. + GET_THIS_PADDR(r2, ia64_mca_pal_base) + ;; + ld8 r16=[r2] + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + // 4. Purge DTR for stack. + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + // Now branch away to caller. + br.sptk.many b1 + ;; + +//EndMain////////////////////////////////////////////////////////////////////// + +//StartMain//////////////////////////////////////////////////////////////////// + +ia64_os_mca_dispatch: + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + mov r19=1 // All MCA events are treated as monarch (for now) + br.sptk ia64_state_save // save the state that is not in minstate +1: + + GET_IA64_MCA_DATA(r2) + // Using MCA stack, struct ia64_sal_os_state, variable proc_state_param + ;; + add r3=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET+SOS(PROC_STATE_PARAM), r2 + ;; + ld8 r18=[r3] // Get processor state parameter on existing PALE_CHECK. + ;; + tbit.nz p6,p7=r18,60 +(p7) br.spnt done_tlb_purge_and_reload + + // The following code purges TC and TR entries. Then reload all TC entries. + // Purge percpu data TC entries. +begin_tlb_purge_and_reload: + movl r18=ia64_reload_tr;; + LOAD_PHYSICAL(p0,r18,ia64_reload_tr);; + mov b1=r18;; + br.sptk.many ia64_do_tlb_purge;; + +ia64_reload_tr: + // Finally reload the TR registers. + // 1. Reload DTR/ITR registers for kernel. + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + movl r17=KERNEL_START + ;; + mov cr.itir=r18 + mov cr.ifa=r17 + mov r16=IA64_TR_KERNEL + mov r19=ip + movl r18=PAGE_KERNEL + ;; + dep r17=0,r19,0, KERNEL_TR_PAGE_SHIFT + ;; + or r18=r17,r18 + ;; + itr.i itr[r16]=r18 + ;; + itr.d dtr[r16]=r18 + ;; + srlz.i + srlz.d + ;; + // 3. Reload ITR for PAL code. + GET_THIS_PADDR(r2, ia64_mca_pal_pte) + ;; + ld8 r18=[r2] // load PAL PTE + ;; + GET_THIS_PADDR(r2, ia64_mca_pal_base) + ;; + ld8 r16=[r2] // load PAL vaddr + mov r19=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r19 + mov cr.ifa=r16 + mov r20=IA64_TR_PALCODE + ;; + itr.i itr[r20]=r18 + ;; + srlz.i + ;; + // 4. Reload DTR for stack. + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r18=r19,r16 + movl r20=PAGE_KERNEL + ;; + add r16=r20,r16 + mov r19=IA64_GRANULE_SHIFT<<2 + ;; + mov cr.itir=r19 + mov cr.ifa=r18 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r16 + GET_THIS_PADDR(r2, ia64_mca_tr_reload) + mov r18 = 1 + ;; + srlz.d + ;; + st8 [r2] =r18 + ;; + +done_tlb_purge_and_reload: + + // switch to per cpu MCA stack + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_new_stack +1: + + // everything saved, now we can set the kernel registers + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_set_kernel_registers +1: + + // This must be done in physical mode + GET_IA64_MCA_DATA(r2) + ;; + mov r7=r2 + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_begin, r4) + + // This code returns to SAL via SOS r2, in general SAL has no unwind + // data. To get a clean termination when backtracing the C MCA/INIT + // handler, set a dummy return address of 0 in this routine. That + // requires that ia64_os_mca_virtual_begin be a global function. +ENTRY(ia64_os_mca_virtual_begin) + .prologue + .save rp,r0 + .body + + mov ar.rsc=3 // set eager mode for C handler + mov r2=r7 // see GET_IA64_MCA_DATA above + ;; + + // Call virtual mode handler + alloc r14=ar.pfs,0,0,3,0 + ;; + DATA_PA_TO_VA(r2,r7) + ;; + add out0=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 + add out1=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 + add out2=IA64_MCA_CPU_MCA_STACK_OFFSET+MCA_SOS_OFFSET, r2 + br.call.sptk.many b0=ia64_mca_handler + + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_mca_virtual_end, r4) +ia64_os_mca_virtual_end: + +END(ia64_os_mca_virtual_begin) + + // switch back to previous stack + alloc r14=ar.pfs,0,0,0,0 // remove the MCA handler frame + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_old_stack +1: + + mov r3=IA64_MCA_CPU_MCA_STACK_OFFSET // use the MCA stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_state_restore // restore the SAL state +1: + + mov b0=r12 // SAL_CHECK return address + + br b0 + +//EndMain////////////////////////////////////////////////////////////////////// + +//StartMain//////////////////////////////////////////////////////////////////// + +// +// NOP init handler for kdump. In panic situation, we may receive INIT +// while kernel transition. Since we initialize registers on leave from +// current kernel, no longer monarch/slave handlers of current kernel in +// virtual mode are called safely. +// We can unregister these init handlers from SAL, however then the INIT +// will result in warmboot by SAL and we cannot retrieve the crashdump. +// Therefore register this NOP function to SAL, to prevent entering virtual +// mode and resulting warmboot by SAL. +// +ia64_os_init_on_kdump: + mov r8=r0 // IA64_INIT_RESUME + mov r9=r10 // SAL_GP + mov r22=r17 // *minstate + ;; + mov r10=r0 // return to same context + mov b0=r12 // SAL_CHECK return address + br b0 + +// +// SAL to OS entry point for INIT on all processors. This has been defined for +// registration purposes with SAL as a part of ia64_mca_init. Monarch and +// slave INIT have identical processing, except for the value of the +// sos->monarch flag in r19. +// + +ia64_os_init_dispatch_monarch: + mov r19=1 // Bow, bow, ye lower middle classes! + br.sptk ia64_os_init_dispatch + +ia64_os_init_dispatch_slave: + mov r19=0 // yeth, mathter + +ia64_os_init_dispatch: + + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_state_save // save the state that is not in minstate +1: + + // switch to per cpu INIT stack + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_new_stack +1: + + // everything saved, now we can set the kernel registers + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_set_kernel_registers +1: + + // This must be done in physical mode + GET_IA64_MCA_DATA(r2) + ;; + mov r7=r2 + + // Enter virtual mode from physical mode + VIRTUAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_begin, r4) + + // This code returns to SAL via SOS r2, in general SAL has no unwind + // data. To get a clean termination when backtracing the C MCA/INIT + // handler, set a dummy return address of 0 in this routine. That + // requires that ia64_os_init_virtual_begin be a global function. +ENTRY(ia64_os_init_virtual_begin) + .prologue + .save rp,r0 + .body + + mov ar.rsc=3 // set eager mode for C handler + mov r2=r7 // see GET_IA64_MCA_DATA above + ;; + + // Call virtual mode handler + alloc r14=ar.pfs,0,0,3,0 + ;; + DATA_PA_TO_VA(r2,r7) + ;; + add out0=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_PT_REGS_OFFSET, r2 + add out1=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SWITCH_STACK_OFFSET, r2 + add out2=IA64_MCA_CPU_INIT_STACK_OFFSET+MCA_SOS_OFFSET, r2 + br.call.sptk.many b0=ia64_init_handler + + // Revert back to physical mode before going back to SAL + PHYSICAL_MODE_ENTER(r2, r3, ia64_os_init_virtual_end, r4) +ia64_os_init_virtual_end: + +END(ia64_os_init_virtual_begin) + + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_state_restore // restore the SAL state +1: + + // switch back to previous stack + alloc r14=ar.pfs,0,0,0,0 // remove the INIT handler frame + mov r3=IA64_MCA_CPU_INIT_STACK_OFFSET // use the INIT stack + LOAD_PHYSICAL(p0,r2,1f) // return address + br.sptk ia64_old_stack +1: + + mov b0=r12 // SAL_CHECK return address + br b0 + +//EndMain////////////////////////////////////////////////////////////////////// + +// common defines for the stubs +#define ms r4 +#define regs r5 +#define temp1 r2 /* careful, it overlaps with input registers */ +#define temp2 r3 /* careful, it overlaps with input registers */ +#define temp3 r7 +#define temp4 r14 + + +//++ +// Name: +// ia64_state_save() +// +// Stub Description: +// +// Save the state that is not in minstate. This is sensitive to the layout of +// struct ia64_sal_os_state in mca.h. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// The OS to SAL section of struct ia64_sal_os_state is set to a default +// value of cold boot (MCA) or warm boot (INIT) and return to the same +// context. ia64_sal_os_state is also used to hold some registers that +// need to be saved and restored across the stack switches. +// +// Most input registers to this stub come from PAL/SAL +// r1 os gp, physical +// r8 pal_proc entry point +// r9 sal_proc entry point +// r10 sal gp +// r11 MCA - rendevzous state, INIT - reason code +// r12 sal return address +// r17 pal min_state +// r18 processor state parameter +// r19 monarch flag, set by the caller of this routine +// +// In addition to the SAL to OS state, this routine saves all the +// registers that appear in struct pt_regs and struct switch_stack, +// excluding those that are already in the PAL minstate area. This +// results in a partial pt_regs and switch_stack, the C code copies the +// remaining registers from PAL minstate to pt_regs and switch_stack. The +// resulting structures contain all the state of the original process when +// MCA/INIT occurred. +// +//-- + +ia64_state_save: + add regs=MCA_SOS_OFFSET, r3 + add ms=MCA_SOS_OFFSET+8, r3 + mov b0=r2 // save return address + cmp.eq p1,p2=IA64_MCA_CPU_MCA_STACK_OFFSET, r3 + ;; + GET_IA64_MCA_DATA(temp2) + ;; + add temp1=temp2, regs // struct ia64_sal_os_state on MCA or INIT stack + add temp2=temp2, ms // struct ia64_sal_os_state+8 on MCA or INIT stack + ;; + mov regs=temp1 // save the start of sos + st8 [temp1]=r1,16 // os_gp + st8 [temp2]=r8,16 // pal_proc + ;; + st8 [temp1]=r9,16 // sal_proc + st8 [temp2]=r11,16 // rv_rc + mov r11=cr.iipa + ;; + st8 [temp1]=r18 // proc_state_param + st8 [temp2]=r19 // monarch + mov r6=IA64_KR(CURRENT) + add temp1=SOS(SAL_RA), regs + add temp2=SOS(SAL_GP), regs + ;; + st8 [temp1]=r12,16 // sal_ra + st8 [temp2]=r10,16 // sal_gp + mov r12=cr.isr + ;; + st8 [temp1]=r17,16 // pal_min_state + st8 [temp2]=r6,16 // prev_IA64_KR_CURRENT + mov r6=IA64_KR(CURRENT_STACK) + ;; + st8 [temp1]=r6,16 // prev_IA64_KR_CURRENT_STACK + st8 [temp2]=r0,16 // prev_task, starts off as NULL + mov r6=cr.ifa + ;; + st8 [temp1]=r12,16 // cr.isr + st8 [temp2]=r6,16 // cr.ifa + mov r12=cr.itir + ;; + st8 [temp1]=r12,16 // cr.itir + st8 [temp2]=r11,16 // cr.iipa + mov r12=cr.iim + ;; + st8 [temp1]=r12 // cr.iim +(p1) mov r12=IA64_MCA_COLD_BOOT +(p2) mov r12=IA64_INIT_WARM_BOOT + mov r6=cr.iha + add temp1=SOS(OS_STATUS), regs + ;; + st8 [temp2]=r6 // cr.iha + add temp2=SOS(CONTEXT), regs + st8 [temp1]=r12 // os_status, default is cold boot + mov r6=IA64_MCA_SAME_CONTEXT + ;; + st8 [temp2]=r6 // context, default is same context + + // Save the pt_regs data that is not in minstate. The previous code + // left regs at sos. + add regs=MCA_PT_REGS_OFFSET-MCA_SOS_OFFSET, regs + ;; + add temp1=PT(B6), regs + mov temp3=b6 + mov temp4=b7 + add temp2=PT(B7), regs + ;; + st8 [temp1]=temp3,PT(AR_CSD)-PT(B6) // save b6 + st8 [temp2]=temp4,PT(AR_SSD)-PT(B7) // save b7 + mov temp3=ar.csd + mov temp4=ar.ssd + cover // must be last in group + ;; + st8 [temp1]=temp3,PT(AR_UNAT)-PT(AR_CSD) // save ar.csd + st8 [temp2]=temp4,PT(AR_PFS)-PT(AR_SSD) // save ar.ssd + mov temp3=ar.unat + mov temp4=ar.pfs + ;; + st8 [temp1]=temp3,PT(AR_RNAT)-PT(AR_UNAT) // save ar.unat + st8 [temp2]=temp4,PT(AR_BSPSTORE)-PT(AR_PFS) // save ar.pfs + mov temp3=ar.rnat + mov temp4=ar.bspstore + ;; + st8 [temp1]=temp3,PT(LOADRS)-PT(AR_RNAT) // save ar.rnat + st8 [temp2]=temp4,PT(AR_FPSR)-PT(AR_BSPSTORE) // save ar.bspstore + mov temp3=ar.bsp + ;; + sub temp3=temp3, temp4 // ar.bsp - ar.bspstore + mov temp4=ar.fpsr + ;; + shl temp3=temp3,16 // compute ar.rsc to be used for "loadrs" + ;; + st8 [temp1]=temp3,PT(AR_CCV)-PT(LOADRS) // save loadrs + st8 [temp2]=temp4,PT(F6)-PT(AR_FPSR) // save ar.fpsr + mov temp3=ar.ccv + ;; + st8 [temp1]=temp3,PT(F7)-PT(AR_CCV) // save ar.ccv + stf.spill [temp2]=f6,PT(F8)-PT(F6) + ;; + stf.spill [temp1]=f7,PT(F9)-PT(F7) + stf.spill [temp2]=f8,PT(F10)-PT(F8) + ;; + stf.spill [temp1]=f9,PT(F11)-PT(F9) + stf.spill [temp2]=f10 + ;; + stf.spill [temp1]=f11 + + // Save the switch_stack data that is not in minstate nor pt_regs. The + // previous code left regs at pt_regs. + add regs=MCA_SWITCH_STACK_OFFSET-MCA_PT_REGS_OFFSET, regs + ;; + add temp1=SW(F2), regs + add temp2=SW(F3), regs + ;; + stf.spill [temp1]=f2,32 + stf.spill [temp2]=f3,32 + ;; + stf.spill [temp1]=f4,32 + stf.spill [temp2]=f5,32 + ;; + stf.spill [temp1]=f12,32 + stf.spill [temp2]=f13,32 + ;; + stf.spill [temp1]=f14,32 + stf.spill [temp2]=f15,32 + ;; + stf.spill [temp1]=f16,32 + stf.spill [temp2]=f17,32 + ;; + stf.spill [temp1]=f18,32 + stf.spill [temp2]=f19,32 + ;; + stf.spill [temp1]=f20,32 + stf.spill [temp2]=f21,32 + ;; + stf.spill [temp1]=f22,32 + stf.spill [temp2]=f23,32 + ;; + stf.spill [temp1]=f24,32 + stf.spill [temp2]=f25,32 + ;; + stf.spill [temp1]=f26,32 + stf.spill [temp2]=f27,32 + ;; + stf.spill [temp1]=f28,32 + stf.spill [temp2]=f29,32 + ;; + stf.spill [temp1]=f30,SW(B2)-SW(F30) + stf.spill [temp2]=f31,SW(B3)-SW(F31) + mov temp3=b2 + mov temp4=b3 + ;; + st8 [temp1]=temp3,16 // save b2 + st8 [temp2]=temp4,16 // save b3 + mov temp3=b4 + mov temp4=b5 + ;; + st8 [temp1]=temp3,SW(AR_LC)-SW(B4) // save b4 + st8 [temp2]=temp4 // save b5 + mov temp3=ar.lc + ;; + st8 [temp1]=temp3 // save ar.lc + + // FIXME: Some proms are incorrectly accessing the minstate area as + // cached data. The C code uses region 6, uncached virtual. Ensure + // that there is no cache data lying around for the first 1K of the + // minstate area. + // Remove this code in September 2006, that gives platforms a year to + // fix their proms and get their customers updated. + + add r1=32*1,r17 + add r2=32*2,r17 + add r3=32*3,r17 + add r4=32*4,r17 + add r5=32*5,r17 + add r6=32*6,r17 + add r7=32*7,r17 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + add r17=32*8,r17 + add r1=32*8,r1 + add r2=32*8,r2 + add r3=32*8,r3 + add r4=32*8,r4 + add r5=32*8,r5 + add r6=32*8,r6 + add r7=32*8,r7 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + add r17=32*8,r17 + add r1=32*8,r1 + add r2=32*8,r2 + add r3=32*8,r3 + add r4=32*8,r4 + add r5=32*8,r5 + add r6=32*8,r6 + add r7=32*8,r7 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + add r17=32*8,r17 + add r1=32*8,r1 + add r2=32*8,r2 + add r3=32*8,r3 + add r4=32*8,r4 + add r5=32*8,r5 + add r6=32*8,r6 + add r7=32*8,r7 + ;; + fc r17 + fc r1 + fc r2 + fc r3 + fc r4 + fc r5 + fc r6 + fc r7 + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_state_restore() +// +// Stub Description: +// +// Restore the SAL/OS state. This is sensitive to the layout of struct +// ia64_sal_os_state in mca.h. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// In addition to the SAL to OS state, this routine restores all the +// registers that appear in struct pt_regs and struct switch_stack, +// excluding those in the PAL minstate area. +// +//-- + +ia64_state_restore: + // Restore the switch_stack data that is not in minstate nor pt_regs. + add regs=MCA_SWITCH_STACK_OFFSET, r3 + mov b0=r2 // save return address + ;; + GET_IA64_MCA_DATA(temp2) + ;; + add regs=temp2, regs + ;; + add temp1=SW(F2), regs + add temp2=SW(F3), regs + ;; + ldf.fill f2=[temp1],32 + ldf.fill f3=[temp2],32 + ;; + ldf.fill f4=[temp1],32 + ldf.fill f5=[temp2],32 + ;; + ldf.fill f12=[temp1],32 + ldf.fill f13=[temp2],32 + ;; + ldf.fill f14=[temp1],32 + ldf.fill f15=[temp2],32 + ;; + ldf.fill f16=[temp1],32 + ldf.fill f17=[temp2],32 + ;; + ldf.fill f18=[temp1],32 + ldf.fill f19=[temp2],32 + ;; + ldf.fill f20=[temp1],32 + ldf.fill f21=[temp2],32 + ;; + ldf.fill f22=[temp1],32 + ldf.fill f23=[temp2],32 + ;; + ldf.fill f24=[temp1],32 + ldf.fill f25=[temp2],32 + ;; + ldf.fill f26=[temp1],32 + ldf.fill f27=[temp2],32 + ;; + ldf.fill f28=[temp1],32 + ldf.fill f29=[temp2],32 + ;; + ldf.fill f30=[temp1],SW(B2)-SW(F30) + ldf.fill f31=[temp2],SW(B3)-SW(F31) + ;; + ld8 temp3=[temp1],16 // restore b2 + ld8 temp4=[temp2],16 // restore b3 + ;; + mov b2=temp3 + mov b3=temp4 + ld8 temp3=[temp1],SW(AR_LC)-SW(B4) // restore b4 + ld8 temp4=[temp2] // restore b5 + ;; + mov b4=temp3 + mov b5=temp4 + ld8 temp3=[temp1] // restore ar.lc + ;; + mov ar.lc=temp3 + + // Restore the pt_regs data that is not in minstate. The previous code + // left regs at switch_stack. + add regs=MCA_PT_REGS_OFFSET-MCA_SWITCH_STACK_OFFSET, regs + ;; + add temp1=PT(B6), regs + add temp2=PT(B7), regs + ;; + ld8 temp3=[temp1],PT(AR_CSD)-PT(B6) // restore b6 + ld8 temp4=[temp2],PT(AR_SSD)-PT(B7) // restore b7 + ;; + mov b6=temp3 + mov b7=temp4 + ld8 temp3=[temp1],PT(AR_UNAT)-PT(AR_CSD) // restore ar.csd + ld8 temp4=[temp2],PT(AR_PFS)-PT(AR_SSD) // restore ar.ssd + ;; + mov ar.csd=temp3 + mov ar.ssd=temp4 + ld8 temp3=[temp1] // restore ar.unat + add temp1=PT(AR_CCV)-PT(AR_UNAT), temp1 + ld8 temp4=[temp2],PT(AR_FPSR)-PT(AR_PFS) // restore ar.pfs + ;; + mov ar.unat=temp3 + mov ar.pfs=temp4 + // ar.rnat, ar.bspstore, loadrs are restore in ia64_old_stack. + ld8 temp3=[temp1],PT(F6)-PT(AR_CCV) // restore ar.ccv + ld8 temp4=[temp2],PT(F7)-PT(AR_FPSR) // restore ar.fpsr + ;; + mov ar.ccv=temp3 + mov ar.fpsr=temp4 + ldf.fill f6=[temp1],PT(F8)-PT(F6) + ldf.fill f7=[temp2],PT(F9)-PT(F7) + ;; + ldf.fill f8=[temp1],PT(F10)-PT(F8) + ldf.fill f9=[temp2],PT(F11)-PT(F9) + ;; + ldf.fill f10=[temp1] + ldf.fill f11=[temp2] + + // Restore the SAL to OS state. The previous code left regs at pt_regs. + add regs=MCA_SOS_OFFSET-MCA_PT_REGS_OFFSET, regs + ;; + add temp1=SOS(SAL_RA), regs + add temp2=SOS(SAL_GP), regs + ;; + ld8 r12=[temp1],16 // sal_ra + ld8 r9=[temp2],16 // sal_gp + ;; + ld8 r22=[temp1],16 // pal_min_state, virtual + ld8 r13=[temp2],16 // prev_IA64_KR_CURRENT + ;; + ld8 r16=[temp1],16 // prev_IA64_KR_CURRENT_STACK + ld8 r20=[temp2],16 // prev_task + ;; + ld8 temp3=[temp1],16 // cr.isr + ld8 temp4=[temp2],16 // cr.ifa + ;; + mov cr.isr=temp3 + mov cr.ifa=temp4 + ld8 temp3=[temp1],16 // cr.itir + ld8 temp4=[temp2],16 // cr.iipa + ;; + mov cr.itir=temp3 + mov cr.iipa=temp4 + ld8 temp3=[temp1] // cr.iim + ld8 temp4=[temp2] // cr.iha + add temp1=SOS(OS_STATUS), regs + add temp2=SOS(CONTEXT), regs + ;; + mov cr.iim=temp3 + mov cr.iha=temp4 + dep r22=0,r22,62,1 // pal_min_state, physical, uncached + mov IA64_KR(CURRENT)=r13 + ld8 r8=[temp1] // os_status + ld8 r10=[temp2] // context + + /* Wire IA64_TR_CURRENT_STACK to the stack that we are resuming to. To + * avoid any dependencies on the algorithm in ia64_switch_to(), just + * purge any existing CURRENT_STACK mapping and insert the new one. + * + * r16 contains prev_IA64_KR_CURRENT_STACK, r13 contains + * prev_IA64_KR_CURRENT, these values may have been changed by the C + * code. Do not use r8, r9, r10, r22, they contain values ready for + * the return to SAL. + */ + + mov r15=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r15=r15,IA64_GRANULE_SHIFT + ;; + dep r15=-1,r15,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r15,r18 + ;; + srlz.d + + extr.u r19=r13,61,3 // r13 = prev_IA64_KR_CURRENT + shl r20=r16,IA64_GRANULE_SHIFT // r16 = prev_IA64_KR_CURRENT_STACK + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + cmp.ne p6,p0=RGN_KERNEL,r19 // new stack is in the kernel region? + or r21=r20,r21 // construct PA | page properties +(p6) br.spnt 1f // the dreaded cpu 0 idle task in region 5:( + ;; + mov cr.itir=r18 + mov cr.ifa=r13 + mov r20=IA64_TR_CURRENT_STACK + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d +1: + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_new_stack() +// +// Stub Description: +// +// Switch to the MCA/INIT stack. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// On entry RBS is still on the original stack, this routine switches RBS +// to use the MCA/INIT stack. +// +// On entry, sos->pal_min_state is physical, on exit it is virtual. +// +//-- + +ia64_new_stack: + add regs=MCA_PT_REGS_OFFSET, r3 + add temp2=MCA_SOS_OFFSET+SOS(PAL_MIN_STATE), r3 + mov b0=r2 // save return address + GET_IA64_MCA_DATA(temp1) + invala + ;; + add temp2=temp2, temp1 // struct ia64_sal_os_state.pal_min_state on MCA or INIT stack + add regs=regs, temp1 // struct pt_regs on MCA or INIT stack + ;; + // Address of minstate area provided by PAL is physical, uncacheable. + // Convert to Linux virtual address in region 6 for C code. + ld8 ms=[temp2] // pal_min_state, physical + ;; + dep temp1=-1,ms,62,2 // set region 6 + mov temp3=IA64_RBS_OFFSET-MCA_PT_REGS_OFFSET + ;; + st8 [temp2]=temp1 // pal_min_state, virtual + + add temp4=temp3, regs // start of bspstore on new stack + ;; + mov ar.bspstore=temp4 // switch RBS to MCA/INIT stack + ;; + flushrs // must be first in group + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_old_stack() +// +// Stub Description: +// +// Switch to the old stack. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +// On entry, pal_min_state is virtual, on exit it is physical. +// +// On entry RBS is on the MCA/INIT stack, this routine switches RBS +// back to the previous stack. +// +// The psr is set to all zeroes. SAL return requires either all zeroes or +// just psr.mc set. Leaving psr.mc off allows INIT to be issued if this +// code does not perform correctly. +// +// The dirty registers at the time of the event were flushed to the +// MCA/INIT stack in ia64_pt_regs_save(). Restore the dirty registers +// before reverting to the previous bspstore. +//-- + +ia64_old_stack: + add regs=MCA_PT_REGS_OFFSET, r3 + mov b0=r2 // save return address + GET_IA64_MCA_DATA(temp2) + LOAD_PHYSICAL(p0,temp1,1f) + ;; + mov cr.ipsr=r0 + mov cr.ifs=r0 + mov cr.iip=temp1 + ;; + invala + rfi +1: + + add regs=regs, temp2 // struct pt_regs on MCA or INIT stack + ;; + add temp1=PT(LOADRS), regs + ;; + ld8 temp2=[temp1],PT(AR_BSPSTORE)-PT(LOADRS) // restore loadrs + ;; + ld8 temp3=[temp1],PT(AR_RNAT)-PT(AR_BSPSTORE) // restore ar.bspstore + mov ar.rsc=temp2 + ;; + loadrs + ld8 temp4=[temp1] // restore ar.rnat + ;; + mov ar.bspstore=temp3 // back to old stack + ;; + mov ar.rnat=temp4 + ;; + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + + +//++ +// Name: +// ia64_set_kernel_registers() +// +// Stub Description: +// +// Set the registers that are required by the C code in order to run on an +// MCA/INIT stack. +// +// r2 contains the return address, r3 contains either +// IA64_MCA_CPU_MCA_STACK_OFFSET or IA64_MCA_CPU_INIT_STACK_OFFSET. +// +//-- + +ia64_set_kernel_registers: + add temp3=MCA_SP_OFFSET, r3 + mov b0=r2 // save return address + GET_IA64_MCA_DATA(temp1) + ;; + add r12=temp1, temp3 // kernel stack pointer on MCA/INIT stack + add r13=temp1, r3 // set current to start of MCA/INIT stack + add r20=temp1, r3 // physical start of MCA/INIT stack + ;; + DATA_PA_TO_VA(r12,temp2) + DATA_PA_TO_VA(r13,temp3) + ;; + mov IA64_KR(CURRENT)=r13 + + /* Wire IA64_TR_CURRENT_STACK to the MCA/INIT handler stack. To avoid + * any dependencies on the algorithm in ia64_switch_to(), just purge + * any existing CURRENT_STACK mapping and insert the new one. + */ + + mov r16=IA64_KR(CURRENT_STACK) // physical granule mapped by IA64_TR_CURRENT_STACK + ;; + shl r16=r16,IA64_GRANULE_SHIFT + ;; + dep r16=-1,r16,61,3 // virtual granule + mov r18=IA64_GRANULE_SHIFT<<2 // for cr.itir.ps + ;; + ptr.d r16,r18 + ;; + srlz.d + + shr.u r16=r20,IA64_GRANULE_SHIFT // r20 = physical start of MCA/INIT stack + movl r21=PAGE_KERNEL // page properties + ;; + mov IA64_KR(CURRENT_STACK)=r16 + or r21=r20,r21 // construct PA | page properties + ;; + mov cr.itir=r18 + mov cr.ifa=r13 + mov r20=IA64_TR_CURRENT_STACK + + movl r17=FPSR_DEFAULT + ;; + mov.m ar.fpsr=r17 // set ar.fpsr to kernel default value + ;; + itr.d dtr[r20]=r21 + ;; + srlz.d + + br.sptk b0 + +//EndStub////////////////////////////////////////////////////////////////////// + +#undef ms +#undef regs +#undef temp1 +#undef temp2 +#undef temp3 +#undef temp4 + + +// Support function for mca.c, it is here to avoid using inline asm. Given the +// address of an rnat slot, if that address is below the current ar.bspstore +// then return the contents of that slot, otherwise return the contents of +// ar.rnat. +GLOBAL_ENTRY(ia64_get_rnat) + alloc r14=ar.pfs,1,0,0,0 + mov ar.rsc=0 + ;; + mov r14=ar.bspstore + ;; + cmp.lt p6,p7=in0,r14 + ;; +(p6) ld8 r8=[in0] +(p7) mov r8=ar.rnat + mov ar.rsc=3 + br.ret.sptk.many rp +END(ia64_get_rnat) + + +// void ia64_set_psr_mc(void) +// +// Set psr.mc bit to mask MCA/INIT. +GLOBAL_ENTRY(ia64_set_psr_mc) + rsm psr.i | psr.ic // disable interrupts + ;; + srlz.d + ;; + mov r14 = psr // get psr{36:35,31:0} + movl r15 = 1f + ;; + dep r14 = -1, r14, PSR_MC, 1 // set psr.mc + ;; + dep r14 = -1, r14, PSR_IC, 1 // set psr.ic + ;; + dep r14 = -1, r14, PSR_BN, 1 // keep bank1 in use + ;; + mov cr.ipsr = r14 + mov cr.ifs = r0 + mov cr.iip = r15 + ;; + rfi +1: + br.ret.sptk.many rp +END(ia64_set_psr_mc) diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c new file mode 100644 index 000000000000..23c203639a96 --- /dev/null +++ b/arch/ia64/kernel/mca_drv.c @@ -0,0 +1,796 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * File: mca_drv.c + * Purpose: Generic MCA handling layer + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) 2004 Hidetoshi Seto + * Copyright (C) 2005 Silicon Graphics, Inc + * Copyright (C) 2005 Keith Owens + * Copyright (C) 2006 Russ Anderson + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include + +#include "mca_drv.h" + +/* max size of SAL error record (default) */ +static int sal_rec_max = 10000; + +/* from mca_drv_asm.S */ +extern void *mca_handler_bhhook(void); + +static DEFINE_SPINLOCK(mca_bh_lock); + +typedef enum { + MCA_IS_LOCAL = 0, + MCA_IS_GLOBAL = 1 +} mca_type_t; + +#define MAX_PAGE_ISOLATE 1024 + +static struct page *page_isolate[MAX_PAGE_ISOLATE]; +static int num_page_isolate = 0; + +typedef enum { + ISOLATE_NG, + ISOLATE_OK, + ISOLATE_NONE +} isolate_status_t; + +typedef enum { + MCA_NOT_RECOVERED = 0, + MCA_RECOVERED = 1 +} recovery_status_t; + +/* + * This pool keeps pointers to the section part of SAL error record + */ +static struct { + slidx_list_t *buffer; /* section pointer list pool */ + int cur_idx; /* Current index of section pointer list pool */ + int max_idx; /* Maximum index of section pointer list pool */ +} slidx_pool; + +static int +fatal_mca(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_ALERT "MCA: %s\n", buf); + + return MCA_NOT_RECOVERED; +} + +static int +mca_recovered(const char *fmt, ...) +{ + va_list args; + char buf[256]; + + va_start(args, fmt); + vsnprintf(buf, sizeof(buf), fmt, args); + va_end(args); + ia64_mca_printk(KERN_INFO "MCA: %s\n", buf); + + return MCA_RECOVERED; +} + +/** + * mca_page_isolate - isolate a poisoned page in order not to use it later + * @paddr: poisoned memory location + * + * Return value: + * one of isolate_status_t, ISOLATE_OK/NG/NONE. + */ + +static isolate_status_t +mca_page_isolate(unsigned long paddr) +{ + int i; + struct page *p; + + /* whether physical address is valid or not */ + if (!ia64_phys_addr_valid(paddr)) + return ISOLATE_NONE; + + if (!pfn_valid(paddr >> PAGE_SHIFT)) + return ISOLATE_NONE; + + /* convert physical address to physical page number */ + p = pfn_to_page(paddr>>PAGE_SHIFT); + + /* check whether a page number have been already registered or not */ + for (i = 0; i < num_page_isolate; i++) + if (page_isolate[i] == p) + return ISOLATE_OK; /* already listed */ + + /* limitation check */ + if (num_page_isolate == MAX_PAGE_ISOLATE) + return ISOLATE_NG; + + /* kick pages having attribute 'SLAB' or 'Reserved' */ + if (PageSlab(p) || PageReserved(p)) + return ISOLATE_NG; + + /* add attribute 'Reserved' and register the page */ + get_page(p); + SetPageReserved(p); + page_isolate[num_page_isolate++] = p; + + return ISOLATE_OK; +} + +/** + * mca_hanlder_bh - Kill the process which occurred memory read error + * @paddr: poisoned address received from MCA Handler + */ + +void +mca_handler_bh(unsigned long paddr, void *iip, unsigned long ipsr) +{ + ia64_mlogbuf_dump(); + printk(KERN_ERR "OS_MCA: process [cpu %d, pid: %d, uid: %d, " + "iip: %p, psr: 0x%lx,paddr: 0x%lx](%s) encounters MCA.\n", + raw_smp_processor_id(), current->pid, + from_kuid(&init_user_ns, current_uid()), + iip, ipsr, paddr, current->comm); + + spin_lock(&mca_bh_lock); + switch (mca_page_isolate(paddr)) { + case ISOLATE_OK: + printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); + break; + case ISOLATE_NG: + printk(KERN_CRIT "Page isolation: ( %lx ) failure.\n", paddr); + break; + default: + break; + } + spin_unlock(&mca_bh_lock); + + /* This process is about to be killed itself */ + make_task_dead(SIGKILL); +} + +/** + * mca_make_peidx - Make index of processor error section + * @slpi: pointer to record of processor error section + * @peidx: pointer to index of processor error section + */ + +static void +mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) +{ + /* + * calculate the start address of + * "struct cpuid_info" and "sal_processor_static_info_t". + */ + u64 total_check_num = slpi->valid.num_cache_check + + slpi->valid.num_tlb_check + + slpi->valid.num_bus_check + + slpi->valid.num_reg_file_check + + slpi->valid.num_ms_check; + u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num + + sizeof(sal_log_processor_info_t); + u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); + + peidx_head(peidx) = slpi; + peidx_mid(peidx) = (struct sal_cpuid_info *) + (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); + peidx_bottom(peidx) = (sal_processor_static_info_t *) + (slpi->valid.psi_static_struct ? + ((char*)slpi + head_size + mid_size) : NULL); +} + +/** + * mca_make_slidx - Make index of SAL error record + * @buffer: pointer to SAL error record + * @slidx: pointer to index of SAL error record + * + * Return value: + * 1 if record has platform error / 0 if not + */ +#define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ + {slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ + hl->hdr = ptr; \ + list_add(&hl->list, &(sect)); \ + slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } + +static int +mca_make_slidx(void *buffer, slidx_table_t *slidx) +{ + int platform_err = 0; + int record_len = ((sal_log_record_header_t*)buffer)->len; + u32 ercd_pos; + int sects; + sal_log_section_hdr_t *sp; + + /* + * Initialize index referring current record + */ + INIT_LIST_HEAD(&(slidx->proc_err)); + INIT_LIST_HEAD(&(slidx->mem_dev_err)); + INIT_LIST_HEAD(&(slidx->sel_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_bus_err)); + INIT_LIST_HEAD(&(slidx->smbios_dev_err)); + INIT_LIST_HEAD(&(slidx->pci_comp_err)); + INIT_LIST_HEAD(&(slidx->plat_specific_err)); + INIT_LIST_HEAD(&(slidx->host_ctlr_err)); + INIT_LIST_HEAD(&(slidx->plat_bus_err)); + INIT_LIST_HEAD(&(slidx->unsupported)); + + /* + * Extract a Record Header + */ + slidx->header = buffer; + + /* + * Extract each section records + * (arranged from "int ia64_log_platform_info_print()") + */ + for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; + ercd_pos < record_len; ercd_pos += sp->len, sects++) { + sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); + if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { + LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); + } else if (!efi_guidcmp(sp->guid, + SAL_PLAT_BUS_ERR_SECT_GUID)) { + platform_err = 1; + LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); + } else { + LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); + } + } + slidx->n_sections = sects; + + return platform_err; +} + +/** + * init_record_index_pools - Initialize pool of lists for SAL record index + * + * Return value: + * 0 on Success / -ENOMEM on Failure + */ +static int +init_record_index_pools(void) +{ + int i; + int rec_max_size; /* Maximum size of SAL error records */ + int sect_min_size; /* Minimum size of SAL error sections */ + /* minimum size table of each section */ + static int sal_log_sect_min_sizes[] = { + sizeof(sal_log_processor_info_t) + + sizeof(sal_processor_static_info_t), + sizeof(sal_log_mem_dev_err_info_t), + sizeof(sal_log_sel_dev_err_info_t), + sizeof(sal_log_pci_bus_err_info_t), + sizeof(sal_log_smbios_dev_err_info_t), + sizeof(sal_log_pci_comp_err_info_t), + sizeof(sal_log_plat_specific_err_info_t), + sizeof(sal_log_host_ctlr_err_info_t), + sizeof(sal_log_plat_bus_err_info_t), + }; + + /* + * MCA handler cannot allocate new memory on flight, + * so we preallocate enough memory to handle a SAL record. + * + * Initialize a handling set of slidx_pool: + * 1. Pick up the max size of SAL error records + * 2. Pick up the min size of SAL error sections + * 3. Allocate the pool as enough to 2 SAL records + * (now we can estimate the maxinum of section in a record.) + */ + + /* - 1 - */ + rec_max_size = sal_rec_max; + + /* - 2 - */ + sect_min_size = sal_log_sect_min_sizes[0]; + for (i = 1; i < ARRAY_SIZE(sal_log_sect_min_sizes); i++) + if (sect_min_size > sal_log_sect_min_sizes[i]) + sect_min_size = sal_log_sect_min_sizes[i]; + + /* - 3 - */ + slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; + slidx_pool.buffer = + kmalloc_array(slidx_pool.max_idx, sizeof(slidx_list_t), + GFP_KERNEL); + + return slidx_pool.buffer ? 0 : -ENOMEM; +} + + +/***************************************************************************** + * Recovery functions * + *****************************************************************************/ + +/** + * is_mca_global - Check whether this MCA is global or not + * @peidx: pointer of index of processor error section + * @pbci: pointer to pal_bus_check_info_t + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * MCA_IS_LOCAL / MCA_IS_GLOBAL + */ + +static mca_type_t +is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + pal_processor_state_info_t *psp = + (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * PAL can request a rendezvous, if the MCA has a global scope. + * If "rz_always" flag is set, SAL requests MCA rendezvous + * in spite of global MCA. + * Therefore it is local MCA when rendezvous has not been requested. + * Failed to rendezvous, the system must be down. + */ + switch (sos->rv_rc) { + case -1: /* SAL rendezvous unsuccessful */ + return MCA_IS_GLOBAL; + case 0: /* SAL rendezvous not required */ + return MCA_IS_LOCAL; + case 1: /* SAL rendezvous successful int */ + case 2: /* SAL rendezvous successful int with init */ + default: + break; + } + + /* + * If One or more Cache/TLB/Reg_File/Uarch_Check is here, + * it would be a local MCA. (i.e. processor internal error) + */ + if (psp->tc || psp->cc || psp->rc || psp->uc) + return MCA_IS_LOCAL; + + /* + * Bus_Check structure with Bus_Check.ib (internal bus error) flag set + * would be a global MCA. (e.g. a system bus address parity error) + */ + if (!pbci || pbci->ib) + return MCA_IS_GLOBAL; + + /* + * Bus_Check structure with Bus_Check.eb (external bus error) flag set + * could be either a local MCA or a global MCA. + * + * Referring Bus_Check.bsi: + * 0: Unknown/unclassified + * 1: BERR# + * 2: BINIT# + * 3: Hard Fail + * (FIXME: Are these SGI specific or generic bsi values?) + */ + if (pbci->eb) + switch (pbci->bsi) { + case 0: + /* e.g. a load from poisoned memory */ + return MCA_IS_LOCAL; + case 1: + case 2: + case 3: + return MCA_IS_GLOBAL; + } + + return MCA_IS_GLOBAL; +} + +/** + * get_target_identifier - Get the valid Cache or Bus check target identifier. + * @peidx: pointer of index of processor error section + * + * Return value: + * target address on Success / 0 on Failure + */ +static u64 +get_target_identifier(peidx_table_t *peidx) +{ + u64 target_address = 0; + sal_log_mod_error_info_t *smei; + pal_cache_check_info_t *pcci; + int i, level = 9; + + /* + * Look through the cache checks for a valid target identifier + * If more than one valid target identifier, return the one + * with the lowest cache level. + */ + for (i = 0; i < peidx_cache_check_num(peidx); i++) { + smei = (sal_log_mod_error_info_t *)peidx_cache_check(peidx, i); + if (smei->valid.target_identifier && smei->target_identifier) { + pcci = (pal_cache_check_info_t *)&(smei->check_info); + if (!target_address || (pcci->level < level)) { + target_address = smei->target_identifier; + level = pcci->level; + continue; + } + } + } + if (target_address) + return target_address; + + /* + * Look at the bus check for a valid target identifier + */ + smei = peidx_bus_check(peidx, 0); + if (smei && smei->valid.target_identifier) + return smei->target_identifier; + + return 0; +} + +/** + * recover_from_read_error - Try to recover the errors which type are "read"s. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_read_error(slidx_table_t *slidx, + peidx_table_t *peidx, pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + u64 target_identifier; + struct pal_min_state_area *pmsa; + struct ia64_psr *psr1, *psr2; + ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; + + /* Is target address valid? */ + target_identifier = get_target_identifier(peidx); + if (!target_identifier) + return fatal_mca("target address not valid"); + + /* + * cpu read or memory-mapped io read + * + * offending process affected process OS MCA do + * kernel mode kernel mode down system + * kernel mode user mode kill the process + * user mode kernel mode down system (*) + * user mode user mode kill the process + * + * (*) You could terminate offending user-mode process + * if (pbci->pv && pbci->pl != 0) *and* if you sure + * the process not have any locks of kernel. + */ + + /* Is minstate valid? */ + if (!peidx_bottom(peidx) || !(peidx_bottom(peidx)->valid.minstate)) + return fatal_mca("minstate not valid"); + psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); + psr2 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_xpsr); + + /* + * Check the privilege level of interrupted context. + * If it is user-mode, then terminate affected process. + */ + + pmsa = sos->pal_min_state; + if (psr1->cpl != 0 || + ((psr2->cpl != 0) && mca_recover_range(pmsa->pmsa_iip))) { + /* + * setup for resume to bottom half of MCA, + * "mca_handler_bhhook" + */ + /* pass to bhhook as argument (gr8, ...) */ + pmsa->pmsa_gr[8-1] = target_identifier; + pmsa->pmsa_gr[9-1] = pmsa->pmsa_iip; + pmsa->pmsa_gr[10-1] = pmsa->pmsa_ipsr; + /* set interrupted return address (but no use) */ + pmsa->pmsa_br0 = pmsa->pmsa_iip; + /* change resume address to bottom half */ + pmsa->pmsa_iip = mca_hdlr_bh->fp; + pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; + /* set cpl with kernel mode */ + psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; + psr2->cpl = 0; + psr2->ri = 0; + psr2->bn = 1; + psr2->i = 0; + + return mca_recovered("user memory corruption. " + "kill affected process - recovered."); + } + + return fatal_mca("kernel context not recovered, iip 0x%lx\n", + pmsa->pmsa_iip); +} + +/** + * recover_from_platform_error - Recover from platform error. + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, + pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + int status = 0; + pal_processor_state_info_t *psp = + (pal_processor_state_info_t*)peidx_psp(peidx); + + if (psp->bc && pbci->eb && pbci->bsi == 0) { + switch(pbci->type) { + case 1: /* partial read */ + case 3: /* full line(cpu) read */ + case 9: /* I/O space read */ + status = recover_from_read_error(slidx, peidx, pbci, + sos); + break; + case 0: /* unknown */ + case 2: /* partial write */ + case 4: /* full line write */ + case 5: /* implicit or explicit write-back operation */ + case 6: /* snoop probe */ + case 7: /* incoming or outgoing ptc.g */ + case 8: /* write coalescing transactions */ + case 10: /* I/O space write */ + case 11: /* inter-processor interrupt message(IPI) */ + case 12: /* interrupt acknowledge or + external task priority cycle */ + default: + break; + } + } else if (psp->cc && !psp->bc) { /* Cache error */ + status = recover_from_read_error(slidx, peidx, pbci, sos); + } + + return status; +} + +/* + * recover_from_tlb_check + * @peidx: pointer of index of processor error section + * + * Return value: + * 1 on Success / 0 on Failure + */ +static int +recover_from_tlb_check(peidx_table_t *peidx) +{ + sal_log_mod_error_info_t *smei; + pal_tlb_check_info_t *ptci; + + smei = (sal_log_mod_error_info_t *)peidx_tlb_check(peidx, 0); + ptci = (pal_tlb_check_info_t *)&(smei->check_info); + + /* + * Look for signature of a duplicate TLB DTC entry, which is + * a SW bug and always fatal. + */ + if (ptci->op == PAL_TLB_CHECK_OP_PURGE + && !(ptci->itr || ptci->dtc || ptci->itc)) + return fatal_mca("Duplicate TLB entry"); + + return mca_recovered("TLB check recovered"); +} + +/** + * recover_from_processor_error + * @platform: whether there are some platform error section or not + * @slidx: pointer of index of SAL error record + * @peidx: pointer of index of processor error section + * @pbci: pointer of pal_bus_check_info + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +recover_from_processor_error(int platform, slidx_table_t *slidx, + peidx_table_t *peidx, pal_bus_check_info_t *pbci, + struct ia64_sal_os_state *sos) +{ + pal_processor_state_info_t *psp = + (pal_processor_state_info_t*)peidx_psp(peidx); + + /* + * Processor recovery status must key off of the PAL recovery + * status in the Processor State Parameter. + */ + + /* + * The machine check is corrected. + */ + if (psp->cm == 1) + return mca_recovered("machine check is already corrected."); + + /* + * The error was not contained. Software must be reset. + */ + if (psp->us || psp->ci == 0) + return fatal_mca("error not contained"); + + /* + * Look for recoverable TLB check + */ + if (psp->tc && !(psp->cc || psp->bc || psp->rc || psp->uc)) + return recover_from_tlb_check(peidx); + + /* + * The cache check and bus check bits have four possible states + * cc bc + * 1 1 Memory error, attempt recovery + * 1 0 Cache error, attempt recovery + * 0 1 I/O error, attempt recovery + * 0 0 Other error type, not recovered + */ + if (psp->cc == 0 && (psp->bc == 0 || pbci == NULL)) + return fatal_mca("No cache or bus check"); + + /* + * Cannot handle more than one bus check. + */ + if (peidx_bus_check_num(peidx) > 1) + return fatal_mca("Too many bus checks"); + + if (pbci->ib) + return fatal_mca("Internal Bus error"); + if (pbci->eb && pbci->bsi > 0) + return fatal_mca("External bus check fatal status"); + + /* + * This is a local MCA and estimated as a recoverable error. + */ + if (platform) + return recover_from_platform_error(slidx, peidx, pbci, sos); + + /* + * On account of strange SAL error record, we cannot recover. + */ + return fatal_mca("Strange SAL record"); +} + +/** + * mca_try_to_recover - Try to recover from MCA + * @rec: pointer to a SAL error record + * @sos: pointer to hand off struct between SAL and OS + * + * Return value: + * 1 on Success / 0 on Failure + */ + +static int +mca_try_to_recover(void *rec, struct ia64_sal_os_state *sos) +{ + int platform_err; + int n_proc_err; + slidx_table_t slidx; + peidx_table_t peidx; + pal_bus_check_info_t pbci; + + /* Make index of SAL error record */ + platform_err = mca_make_slidx(rec, &slidx); + + /* Count processor error sections */ + n_proc_err = slidx_count(&slidx, proc_err); + + /* Now, OS can recover when there is one processor error section */ + if (n_proc_err > 1) + return fatal_mca("Too Many Errors"); + else if (n_proc_err == 0) + /* Weird SAL record ... We can't do anything */ + return fatal_mca("Weird SAL record"); + + /* Make index of processor error section */ + mca_make_peidx((sal_log_processor_info_t*) + slidx_first_entry(&slidx.proc_err)->hdr, &peidx); + + /* Extract Processor BUS_CHECK[0] */ + *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); + + /* Check whether MCA is global or not */ + if (is_mca_global(&peidx, &pbci, sos)) + return fatal_mca("global MCA"); + + /* Try to recover a processor error */ + return recover_from_processor_error(platform_err, &slidx, &peidx, + &pbci, sos); +} + +/* + * ============================================================================= + */ + +int __init mca_external_handler_init(void) +{ + if (init_record_index_pools()) + return -ENOMEM; + + /* register external mca handlers */ + if (ia64_reg_MCA_extension(mca_try_to_recover)) { + printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); + kfree(slidx_pool.buffer); + return -EFAULT; + } + return 0; +} + +void __exit mca_external_handler_exit(void) +{ + /* unregister external mca handlers */ + ia64_unreg_MCA_extension(); + kfree(slidx_pool.buffer); +} + +module_init(mca_external_handler_init); +module_exit(mca_external_handler_exit); + +module_param(sal_rec_max, int, 0644); +MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); + +MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); +MODULE_LICENSE("GPL"); diff --git a/arch/ia64/kernel/mca_drv.h b/arch/ia64/kernel/mca_drv.h new file mode 100644 index 000000000000..45bc4e3ae14f --- /dev/null +++ b/arch/ia64/kernel/mca_drv.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * File: mca_drv.h + * Purpose: Define helpers for Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) 2004 Hidetoshi Seto + */ +/* + * Processor error section: + * + * +-sal_log_processor_info_t *info-------------+ + * | sal_log_section_hdr_t header; | + * | ... | + * | sal_log_mod_error_info_t info[0]; | + * +-+----------------+-------------------------+ + * | CACHE_CHECK | ^ num_cache_check v + * +----------------+ + * | TLB_CHECK | ^ num_tlb_check v + * +----------------+ + * | BUS_CHECK | ^ num_bus_check v + * +----------------+ + * | REG_FILE_CHECK | ^ num_reg_file_check v + * +----------------+ + * | MS_CHECK | ^ num_ms_check v + * +-struct cpuid_info *id----------------------+ + * | regs[5]; | + * | reserved; | + * +-sal_processor_static_info_t *regs----------+ + * | valid; | + * | ... | + * | fr[128]; | + * +--------------------------------------------+ + */ + +/* peidx: index of processor error section */ +typedef struct peidx_table { + sal_log_processor_info_t *info; + struct sal_cpuid_info *id; + sal_processor_static_info_t *regs; +} peidx_table_t; + +#define peidx_head(p) (((p)->info)) +#define peidx_mid(p) (((p)->id)) +#define peidx_bottom(p) (((p)->regs)) + +#define peidx_psp(p) (&(peidx_head(p)->proc_state_parameter)) +#define peidx_field_valid(p) (&(peidx_head(p)->valid)) +#define peidx_minstate_area(p) (&(peidx_bottom(p)->min_state_area)) + +#define peidx_cache_check_num(p) (peidx_head(p)->valid.num_cache_check) +#define peidx_tlb_check_num(p) (peidx_head(p)->valid.num_tlb_check) +#define peidx_bus_check_num(p) (peidx_head(p)->valid.num_bus_check) +#define peidx_reg_file_check_num(p) (peidx_head(p)->valid.num_reg_file_check) +#define peidx_ms_check_num(p) (peidx_head(p)->valid.num_ms_check) + +#define peidx_cache_check_idx(p, n) (n) +#define peidx_tlb_check_idx(p, n) (peidx_cache_check_idx(p, peidx_cache_check_num(p)) + n) +#define peidx_bus_check_idx(p, n) (peidx_tlb_check_idx(p, peidx_tlb_check_num(p)) + n) +#define peidx_reg_file_check_idx(p, n) (peidx_bus_check_idx(p, peidx_bus_check_num(p)) + n) +#define peidx_ms_check_idx(p, n) (peidx_reg_file_check_idx(p, peidx_reg_file_check_num(p)) + n) + +#define peidx_mod_error_info(p, name, n) \ +({ int __idx = peidx_##name##_idx(p, n); \ + sal_log_mod_error_info_t *__ret = NULL; \ + if (peidx_##name##_num(p) > n) /*BUG*/ \ + __ret = &(peidx_head(p)->info[__idx]); \ + __ret; }) + +#define peidx_cache_check(p, n) peidx_mod_error_info(p, cache_check, n) +#define peidx_tlb_check(p, n) peidx_mod_error_info(p, tlb_check, n) +#define peidx_bus_check(p, n) peidx_mod_error_info(p, bus_check, n) +#define peidx_reg_file_check(p, n) peidx_mod_error_info(p, reg_file_check, n) +#define peidx_ms_check(p, n) peidx_mod_error_info(p, ms_check, n) + +#define peidx_check_info(proc, name, n) \ +({ \ + sal_log_mod_error_info_t *__info = peidx_mod_error_info(proc, name, n);\ + u64 __temp = __info && __info->valid.check_info \ + ? __info->check_info : 0; \ + __temp; }) + +/* slidx: index of SAL log error record */ + +typedef struct slidx_list { + struct list_head list; + sal_log_section_hdr_t *hdr; +} slidx_list_t; + +typedef struct slidx_table { + sal_log_record_header_t *header; + int n_sections; /* # of section headers */ + struct list_head proc_err; + struct list_head mem_dev_err; + struct list_head sel_dev_err; + struct list_head pci_bus_err; + struct list_head smbios_dev_err; + struct list_head pci_comp_err; + struct list_head plat_specific_err; + struct list_head host_ctlr_err; + struct list_head plat_bus_err; + struct list_head unsupported; /* list of unsupported sections */ +} slidx_table_t; + +#define slidx_foreach_entry(pos, head) \ + list_for_each_entry(pos, head, list) +#define slidx_first_entry(head) \ + (((head)->next != (head)) ? list_entry((head)->next, typeof(slidx_list_t), list) : NULL) +#define slidx_count(slidx, sec) \ +({ int __count = 0; \ + slidx_list_t *__pos; \ + slidx_foreach_entry(__pos, &((slidx)->sec)) { __count++; }\ + __count; }) + +struct mca_table_entry { + int start_addr; /* location-relative starting address of MCA recoverable range */ + int end_addr; /* location-relative ending address of MCA recoverable range */ +}; + +extern const struct mca_table_entry *search_mca_tables (unsigned long addr); +extern int mca_recover_range(unsigned long); +extern void ia64_mlogbuf_dump(void); + diff --git a/arch/ia64/kernel/mca_drv_asm.S b/arch/ia64/kernel/mca_drv_asm.S new file mode 100644 index 000000000000..4428f57bee73 --- /dev/null +++ b/arch/ia64/kernel/mca_drv_asm.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * File: mca_drv_asm.S + * Purpose: Assembly portion of Generic MCA handling + * + * Copyright (C) 2004 FUJITSU LIMITED + * Copyright (C) 2004 Hidetoshi Seto + */ +#include + +#include +#include +#include + +GLOBAL_ENTRY(mca_handler_bhhook) + invala // clear RSE ? + cover + ;; + clrrrb + ;; + alloc r16=ar.pfs,0,2,3,0 // make a new frame + mov ar.rsc=0 + mov r13=IA64_KR(CURRENT) // current task pointer + ;; + mov r2=r13 + ;; + addl r22=IA64_RBS_OFFSET,r2 + ;; + mov ar.bspstore=r22 + addl sp=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r2 + ;; + adds r2=IA64_TASK_THREAD_ON_USTACK_OFFSET,r13 + ;; + st1 [r2]=r0 // clear current->thread.on_ustack flag + mov loc0=r16 + movl loc1=mca_handler_bh // recovery C function + ;; + mov out0=r8 // poisoned address + mov out1=r9 // iip + mov out2=r10 // psr + mov b6=loc1 + ;; + mov loc1=rp + ssm psr.ic + ;; + srlz.i + ;; + ssm psr.i + br.call.sptk.many rp=b6 // does not return ... + ;; + mov ar.pfs=loc0 + mov rp=loc1 + ;; + mov r8=r0 + br.ret.sptk.many rp +END(mca_handler_bhhook) diff --git a/arch/ia64/kernel/minstate.h b/arch/ia64/kernel/minstate.h new file mode 100644 index 000000000000..d6eab2a1084d --- /dev/null +++ b/arch/ia64/kernel/minstate.h @@ -0,0 +1,251 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include + +#include "entry.h" +#include + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE +/* read ar.itc in advance, and use it before leaving bank 0 */ +#define ACCOUNT_GET_STAMP \ +(pUStk) mov.m r20=ar.itc; +#define ACCOUNT_SYS_ENTER \ +(pUStk) br.call.spnt rp=account_sys_enter \ + ;; +#else +#define ACCOUNT_GET_STAMP +#define ACCOUNT_SYS_ENTER +#endif + +.section ".data..patch.rse", "a" +.previous + +/* + * DO_SAVE_MIN switches to the kernel stacks (if necessary) and saves + * the minimum state necessary that allows us to turn psr.ic back + * on. + * + * Assumed state upon entry: + * psr.ic: off + * r31: contains saved predicates (pr) + * + * Upon exit, the state is as follows: + * psr.ic: off + * r2 = points to &pt_regs.r16 + * r8 = contents of ar.ccv + * r9 = contents of ar.csd + * r10 = contents of ar.ssd + * r11 = FPSR_DEFAULT + * r12 = kernel sp (kernel virtual address) + * r13 = points to current task_struct (kernel virtual address) + * p15 = TRUE if psr.i is set in cr.ipsr + * predicate registers (other than p2, p3, and p15), b6, r3, r14, r15: + * preserved + * + * Note that psr.ic is NOT turned on by this macro. This is so that + * we can pass interruption state as arguments to a handler. + */ +#define IA64_NATIVE_DO_SAVE_MIN(__COVER,SAVE_IFS,EXTRA,WORKAROUND) \ + mov r16=IA64_KR(CURRENT); /* M */ \ + mov r27=ar.rsc; /* M */ \ + mov r20=r1; /* A */ \ + mov r25=ar.unat; /* M */ \ + MOV_FROM_IPSR(p0,r29); /* M */ \ + mov r26=ar.pfs; /* I */ \ + MOV_FROM_IIP(r28); /* M */ \ + mov r21=ar.fpsr; /* M */ \ + __COVER; /* B;; (or nothing) */ \ + ;; \ + adds r16=IA64_TASK_THREAD_ON_USTACK_OFFSET,r16; \ + ;; \ + ld1 r17=[r16]; /* load current->thread.on_ustack flag */ \ + st1 [r16]=r0; /* clear current->thread.on_ustack flag */ \ + adds r1=-IA64_TASK_THREAD_ON_USTACK_OFFSET,r16 \ + /* switch from user to kernel RBS: */ \ + ;; \ + invala; /* M */ \ + SAVE_IFS; \ + cmp.eq pKStk,pUStk=r0,r17; /* are we in kernel mode already? */ \ + ;; \ +(pUStk) mov ar.rsc=0; /* set enforced lazy mode, pl 0, little-endian, loadrs=0 */ \ + ;; \ +(pUStk) mov.m r24=ar.rnat; \ +(pUStk) addl r22=IA64_RBS_OFFSET,r1; /* compute base of RBS */ \ +(pKStk) mov r1=sp; /* get sp */ \ + ;; \ +(pUStk) lfetch.fault.excl.nt1 [r22]; \ +(pUStk) addl r1=IA64_STK_OFFSET-IA64_PT_REGS_SIZE,r1; /* compute base of memory stack */ \ +(pUStk) mov r23=ar.bspstore; /* save ar.bspstore */ \ + ;; \ +(pUStk) mov ar.bspstore=r22; /* switch to kernel RBS */ \ +(pKStk) addl r1=-IA64_PT_REGS_SIZE,r1; /* if in kernel mode, use sp (r12) */ \ + ;; \ +(pUStk) mov r18=ar.bsp; \ +(pUStk) mov ar.rsc=0x3; /* set eager mode, pl 0, little-endian, loadrs=0 */ \ + adds r17=2*L1_CACHE_BYTES,r1; /* really: biggest cache-line size */ \ + adds r16=PT(CR_IPSR),r1; \ + ;; \ + lfetch.fault.excl.nt1 [r17],L1_CACHE_BYTES; \ + st8 [r16]=r29; /* save cr.ipsr */ \ + ;; \ + lfetch.fault.excl.nt1 [r17]; \ + tbit.nz p15,p0=r29,IA64_PSR_I_BIT; \ + mov r29=b0 \ + ;; \ + WORKAROUND; \ + adds r16=PT(R8),r1; /* initialize first base pointer */ \ + adds r17=PT(R9),r1; /* initialize second base pointer */ \ +(pKStk) mov r18=r0; /* make sure r18 isn't NaT */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r8,16; \ +.mem.offset 8,0; st8.spill [r17]=r9,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r10,24; \ +.mem.offset 8,0; st8.spill [r17]=r11,24; \ + ;; \ + st8 [r16]=r28,16; /* save cr.iip */ \ + st8 [r17]=r30,16; /* save cr.ifs */ \ +(pUStk) sub r18=r18,r22; /* r18=RSE.ndirty*8 */ \ + mov r8=ar.ccv; \ + mov r9=ar.csd; \ + mov r10=ar.ssd; \ + movl r11=FPSR_DEFAULT; /* L-unit */ \ + ;; \ + st8 [r16]=r25,16; /* save ar.unat */ \ + st8 [r17]=r26,16; /* save ar.pfs */ \ + shl r18=r18,16; /* compute ar.rsc to be used for "loadrs" */ \ + ;; \ + st8 [r16]=r27,16; /* save ar.rsc */ \ +(pUStk) st8 [r17]=r24,16; /* save ar.rnat */ \ +(pKStk) adds r17=16,r17; /* skip over ar_rnat field */ \ + ;; /* avoid RAW on r16 & r17 */ \ +(pUStk) st8 [r16]=r23,16; /* save ar.bspstore */ \ + st8 [r17]=r31,16; /* save predicates */ \ +(pKStk) adds r16=16,r16; /* skip over ar_bspstore field */ \ + ;; \ + st8 [r16]=r29,16; /* save b0 */ \ + st8 [r17]=r18,16; /* save ar.rsc value for "loadrs" */ \ + cmp.eq pNonSys,pSys=r0,r0 /* initialize pSys=0, pNonSys=1 */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r20,16; /* save original r1 */ \ +.mem.offset 8,0; st8.spill [r17]=r12,16; \ + adds r12=-16,r1; /* switch to kernel memory stack (with 16 bytes of scratch) */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r13,16; \ +.mem.offset 8,0; st8.spill [r17]=r21,16; /* save ar.fpsr */ \ + mov r13=IA64_KR(CURRENT); /* establish `current' */ \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r15,16; \ +.mem.offset 8,0; st8.spill [r17]=r14,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r16]=r2,16; \ +.mem.offset 8,0; st8.spill [r17]=r3,16; \ + ACCOUNT_GET_STAMP \ + adds r2=IA64_PT_REGS_R16_OFFSET,r1; \ + ;; \ + EXTRA; \ + movl r1=__gp; /* establish kernel global pointer */ \ + ;; \ + ACCOUNT_SYS_ENTER \ + bsw.1; /* switch back to bank 1 (must be last in insn group) */ \ + ;; + +/* + * SAVE_REST saves the remainder of pt_regs (with psr.ic on). + * + * Assumed state upon entry: + * psr.ic: on + * r2: points to &pt_regs.r16 + * r3: points to &pt_regs.r17 + * r8: contents of ar.ccv + * r9: contents of ar.csd + * r10: contents of ar.ssd + * r11: FPSR_DEFAULT + * + * Registers r14 and r15 are guaranteed not to be touched by SAVE_REST. + */ +#define SAVE_REST \ +.mem.offset 0,0; st8.spill [r2]=r16,16; \ +.mem.offset 8,0; st8.spill [r3]=r17,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r18,16; \ +.mem.offset 8,0; st8.spill [r3]=r19,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r20,16; \ +.mem.offset 8,0; st8.spill [r3]=r21,16; \ + mov r18=b6; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r22,16; \ +.mem.offset 8,0; st8.spill [r3]=r23,16; \ + mov r19=b7; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r24,16; \ +.mem.offset 8,0; st8.spill [r3]=r25,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r26,16; \ +.mem.offset 8,0; st8.spill [r3]=r27,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r28,16; \ +.mem.offset 8,0; st8.spill [r3]=r29,16; \ + ;; \ +.mem.offset 0,0; st8.spill [r2]=r30,16; \ +.mem.offset 8,0; st8.spill [r3]=r31,32; \ + ;; \ + mov ar.fpsr=r11; /* M-unit */ \ + st8 [r2]=r8,8; /* ar.ccv */ \ + adds r24=PT(B6)-PT(F7),r3; \ + ;; \ + stf.spill [r2]=f6,32; \ + stf.spill [r3]=f7,32; \ + ;; \ + stf.spill [r2]=f8,32; \ + stf.spill [r3]=f9,32; \ + ;; \ + stf.spill [r2]=f10; \ + stf.spill [r3]=f11; \ + adds r25=PT(B7)-PT(F11),r3; \ + ;; \ + st8 [r24]=r18,16; /* b6 */ \ + st8 [r25]=r19,16; /* b7 */ \ + ;; \ + st8 [r24]=r9; /* ar.csd */ \ + st8 [r25]=r10; /* ar.ssd */ \ + ;; + +#define RSE_WORKAROUND \ +(pUStk) extr.u r17=r18,3,6; \ +(pUStk) sub r16=r18,r22; \ +[1:](pKStk) br.cond.sptk.many 1f; \ + .xdata4 ".data..patch.rse",1b-. \ + ;; \ + cmp.ge p6,p7 = 33,r17; \ + ;; \ +(p6) mov r17=0x310; \ +(p7) mov r17=0x308; \ + ;; \ + cmp.leu p1,p0=r16,r17; \ +(p1) br.cond.sptk.many 1f; \ + dep.z r17=r26,0,62; \ + movl r16=2f; \ + ;; \ + mov ar.pfs=r17; \ + dep r27=r0,r27,16,14; \ + mov b0=r16; \ + ;; \ + br.ret.sptk b0; \ + ;; \ +2: \ + mov ar.rsc=r0 \ + ;; \ + flushrs; \ + ;; \ + mov ar.bspstore=r22 \ + ;; \ + mov r18=ar.bsp; \ + ;; \ +1: \ + .pred.rel "mutex", pKStk, pUStk + +#define SAVE_MIN_WITH_COVER DO_SAVE_MIN(COVER, mov r30=cr.ifs, , RSE_WORKAROUND) +#define SAVE_MIN_WITH_COVER_R19 DO_SAVE_MIN(COVER, mov r30=cr.ifs, mov r15=r19, RSE_WORKAROUND) +#define SAVE_MIN DO_SAVE_MIN( , mov r30=r0, , ) diff --git a/arch/ia64/kernel/module.c b/arch/ia64/kernel/module.c new file mode 100644 index 000000000000..3661135da9d9 --- /dev/null +++ b/arch/ia64/kernel/module.c @@ -0,0 +1,959 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IA-64-specific support for kernel module loader. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * Loosely based on patch by Rusty Russell. + */ + +/* relocs tested so far: + + DIR64LSB + FPTR64LSB + GPREL22 + LDXMOV + LDXMOV + LTOFF22 + LTOFF22X + LTOFF22X + LTOFF_FPTR22 + PCREL21B (for br.call only; br.cond is not supported out of modules!) + PCREL60B (for brl.cond only; brl.call is not supported for modules!) + PCREL64LSB + SECREL32LSB + SEGREL64LSB + */ + + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define ARCH_MODULE_DEBUG 0 + +#if ARCH_MODULE_DEBUG +# define DEBUGP printk +# define inline +#else +# define DEBUGP(fmt , a...) +#endif + +#ifdef CONFIG_ITANIUM +# define USE_BRL 0 +#else +# define USE_BRL 1 +#endif + +#define MAX_LTOFF ((uint64_t) (1 << 22)) /* max. allowable linkage-table offset */ + +/* Define some relocation helper macros/types: */ + +#define FORMAT_SHIFT 0 +#define FORMAT_BITS 3 +#define FORMAT_MASK ((1 << FORMAT_BITS) - 1) +#define VALUE_SHIFT 3 +#define VALUE_BITS 5 +#define VALUE_MASK ((1 << VALUE_BITS) - 1) + +enum reloc_target_format { + /* direct encoded formats: */ + RF_NONE = 0, + RF_INSN14 = 1, + RF_INSN22 = 2, + RF_INSN64 = 3, + RF_32MSB = 4, + RF_32LSB = 5, + RF_64MSB = 6, + RF_64LSB = 7, + + /* formats that cannot be directly decoded: */ + RF_INSN60, + RF_INSN21B, /* imm21 form 1 */ + RF_INSN21M, /* imm21 form 2 */ + RF_INSN21F /* imm21 form 3 */ +}; + +enum reloc_value_formula { + RV_DIRECT = 4, /* S + A */ + RV_GPREL = 5, /* @gprel(S + A) */ + RV_LTREL = 6, /* @ltoff(S + A) */ + RV_PLTREL = 7, /* @pltoff(S + A) */ + RV_FPTR = 8, /* @fptr(S + A) */ + RV_PCREL = 9, /* S + A - P */ + RV_LTREL_FPTR = 10, /* @ltoff(@fptr(S + A)) */ + RV_SEGREL = 11, /* @segrel(S + A) */ + RV_SECREL = 12, /* @secrel(S + A) */ + RV_BDREL = 13, /* BD + A */ + RV_LTV = 14, /* S + A (like RV_DIRECT, except frozen at static link-time) */ + RV_PCREL2 = 15, /* S + A - P */ + RV_SPECIAL = 16, /* various (see below) */ + RV_RSVD17 = 17, + RV_TPREL = 18, /* @tprel(S + A) */ + RV_LTREL_TPREL = 19, /* @ltoff(@tprel(S + A)) */ + RV_DTPMOD = 20, /* @dtpmod(S + A) */ + RV_LTREL_DTPMOD = 21, /* @ltoff(@dtpmod(S + A)) */ + RV_DTPREL = 22, /* @dtprel(S + A) */ + RV_LTREL_DTPREL = 23, /* @ltoff(@dtprel(S + A)) */ + RV_RSVD24 = 24, + RV_RSVD25 = 25, + RV_RSVD26 = 26, + RV_RSVD27 = 27 + /* 28-31 reserved for implementation-specific purposes. */ +}; + +#define N(reloc) [R_IA64_##reloc] = #reloc + +static const char *reloc_name[256] = { + N(NONE), N(IMM14), N(IMM22), N(IMM64), + N(DIR32MSB), N(DIR32LSB), N(DIR64MSB), N(DIR64LSB), + N(GPREL22), N(GPREL64I), N(GPREL32MSB), N(GPREL32LSB), + N(GPREL64MSB), N(GPREL64LSB), N(LTOFF22), N(LTOFF64I), + N(PLTOFF22), N(PLTOFF64I), N(PLTOFF64MSB), N(PLTOFF64LSB), + N(FPTR64I), N(FPTR32MSB), N(FPTR32LSB), N(FPTR64MSB), + N(FPTR64LSB), N(PCREL60B), N(PCREL21B), N(PCREL21M), + N(PCREL21F), N(PCREL32MSB), N(PCREL32LSB), N(PCREL64MSB), + N(PCREL64LSB), N(LTOFF_FPTR22), N(LTOFF_FPTR64I), N(LTOFF_FPTR32MSB), + N(LTOFF_FPTR32LSB), N(LTOFF_FPTR64MSB), N(LTOFF_FPTR64LSB), N(SEGREL32MSB), + N(SEGREL32LSB), N(SEGREL64MSB), N(SEGREL64LSB), N(SECREL32MSB), + N(SECREL32LSB), N(SECREL64MSB), N(SECREL64LSB), N(REL32MSB), + N(REL32LSB), N(REL64MSB), N(REL64LSB), N(LTV32MSB), + N(LTV32LSB), N(LTV64MSB), N(LTV64LSB), N(PCREL21BI), + N(PCREL22), N(PCREL64I), N(IPLTMSB), N(IPLTLSB), + N(COPY), N(LTOFF22X), N(LDXMOV), N(TPREL14), + N(TPREL22), N(TPREL64I), N(TPREL64MSB), N(TPREL64LSB), + N(LTOFF_TPREL22), N(DTPMOD64MSB), N(DTPMOD64LSB), N(LTOFF_DTPMOD22), + N(DTPREL14), N(DTPREL22), N(DTPREL64I), N(DTPREL32MSB), + N(DTPREL32LSB), N(DTPREL64MSB), N(DTPREL64LSB), N(LTOFF_DTPREL22) +}; + +#undef N + +/* Opaque struct for insns, to protect against derefs. */ +struct insn; + +static inline uint64_t +bundle (const struct insn *insn) +{ + return (uint64_t) insn & ~0xfUL; +} + +static inline int +slot (const struct insn *insn) +{ + return (uint64_t) insn & 0x3; +} + +static int +apply_imm64 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (slot(insn) != 1 && slot(insn) != 2) { + printk(KERN_ERR "%s: invalid slot number %d for IMM64\n", + mod->name, slot(insn)); + return 0; + } + ia64_patch_imm64((u64) insn, val); + return 1; +} + +static int +apply_imm60 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (slot(insn) != 1 && slot(insn) != 2) { + printk(KERN_ERR "%s: invalid slot number %d for IMM60\n", + mod->name, slot(insn)); + return 0; + } + if (val + ((uint64_t) 1 << 59) >= (1UL << 60)) { + printk(KERN_ERR "%s: value %ld out of IMM60 range\n", + mod->name, (long) val); + return 0; + } + ia64_patch_imm60((u64) insn, val); + return 1; +} + +static int +apply_imm22 (struct module *mod, struct insn *insn, uint64_t val) +{ + if (val + (1 << 21) >= (1 << 22)) { + printk(KERN_ERR "%s: value %li out of IMM22 range\n", + mod->name, (long)val); + return 0; + } + ia64_patch((u64) insn, 0x01fffcfe000UL, ( ((val & 0x200000UL) << 15) /* bit 21 -> 36 */ + | ((val & 0x1f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x00ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x00007fUL) << 13) /* bit 0 -> 13 */)); + return 1; +} + +static int +apply_imm21b (struct module *mod, struct insn *insn, uint64_t val) +{ + if (val + (1 << 20) >= (1 << 21)) { + printk(KERN_ERR "%s: value %li out of IMM21b range\n", + mod->name, (long)val); + return 0; + } + ia64_patch((u64) insn, 0x11ffffe000UL, ( ((val & 0x100000UL) << 16) /* bit 20 -> 36 */ + | ((val & 0x0fffffUL) << 13) /* bit 0 -> 13 */)); + return 1; +} + +#if USE_BRL + +struct plt_entry { + /* Three instruction bundles in PLT. */ + unsigned char bundle[2][16]; +}; + +static const struct plt_entry ia64_plt_template = { + { + { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ + 0x00, 0x00, 0x00, 0x60 + }, + { + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* brl.many gp=TARGET_GP */ + 0x08, 0x00, 0x00, 0xc0 + } + } +}; + +static int +patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) +{ + if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_gp) + && apply_imm60(mod, (struct insn *) (plt->bundle[1] + 2), + (target_ip - (int64_t) plt->bundle[1]) / 16)) + return 1; + return 0; +} + +unsigned long +plt_target (struct plt_entry *plt) +{ + uint64_t b0, b1, *b = (uint64_t *) plt->bundle[1]; + long off; + + b0 = b[0]; b1 = b[1]; + off = ( ((b1 & 0x00fffff000000000UL) >> 36) /* imm20b -> bit 0 */ + | ((b0 >> 48) << 20) | ((b1 & 0x7fffffUL) << 36) /* imm39 -> bit 20 */ + | ((b1 & 0x0800000000000000UL) << 0)); /* i -> bit 59 */ + return (long) plt->bundle[1] + 16*off; +} + +#else /* !USE_BRL */ + +struct plt_entry { + /* Three instruction bundles in PLT. */ + unsigned char bundle[3][16]; +}; + +static const struct plt_entry ia64_plt_template = { + { + { + 0x05, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* movl r16=TARGET_IP */ + 0x02, 0x00, 0x00, 0x60 + }, + { + 0x04, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MLX] nop.m 0 */ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x20, /* movl gp=TARGET_GP */ + 0x00, 0x00, 0x00, 0x60 + }, + { + 0x11, 0x00, 0x00, 0x00, 0x01, 0x00, /* [MIB] nop.m 0 */ + 0x60, 0x80, 0x04, 0x80, 0x03, 0x00, /* mov b6=r16 */ + 0x60, 0x00, 0x80, 0x00 /* br.few b6 */ + } + } +}; + +static int +patch_plt (struct module *mod, struct plt_entry *plt, long target_ip, unsigned long target_gp) +{ + if (apply_imm64(mod, (struct insn *) (plt->bundle[0] + 2), target_ip) + && apply_imm64(mod, (struct insn *) (plt->bundle[1] + 2), target_gp)) + return 1; + return 0; +} + +unsigned long +plt_target (struct plt_entry *plt) +{ + uint64_t b0, b1, *b = (uint64_t *) plt->bundle[0]; + + b0 = b[0]; b1 = b[1]; + return ( ((b1 & 0x000007f000000000) >> 36) /* imm7b -> bit 0 */ + | ((b1 & 0x07fc000000000000) >> 43) /* imm9d -> bit 7 */ + | ((b1 & 0x0003e00000000000) >> 29) /* imm5c -> bit 16 */ + | ((b1 & 0x0000100000000000) >> 23) /* ic -> bit 21 */ + | ((b0 >> 46) << 22) | ((b1 & 0x7fffff) << 40) /* imm41 -> bit 22 */ + | ((b1 & 0x0800000000000000) << 4)); /* i -> bit 63 */ +} + +#endif /* !USE_BRL */ + +void +module_arch_freeing_init (struct module *mod) +{ + if (mod->arch.init_unw_table) { + unw_remove_unwind_table(mod->arch.init_unw_table); + mod->arch.init_unw_table = NULL; + } +} + +/* Have we already seen one of these relocations? */ +/* FIXME: we could look in other sections, too --RR */ +static int +duplicate_reloc (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + if (rela[i].r_info == rela[num].r_info && rela[i].r_addend == rela[num].r_addend) + return 1; + } + return 0; +} + +/* Count how many GOT entries we may need */ +static unsigned int +count_gots (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_LTOFF22: + case R_IA64_LTOFF22X: + case R_IA64_LTOFF64I: + case R_IA64_LTOFF_FPTR22: + case R_IA64_LTOFF_FPTR64I: + case R_IA64_LTOFF_FPTR32MSB: + case R_IA64_LTOFF_FPTR32LSB: + case R_IA64_LTOFF_FPTR64MSB: + case R_IA64_LTOFF_FPTR64LSB: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* Count how many PLT entries we may need */ +static unsigned int +count_plts (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not + time critical */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_PCREL21B: + case R_IA64_PLTOFF22: + case R_IA64_PLTOFF64I: + case R_IA64_PLTOFF64MSB: + case R_IA64_PLTOFF64LSB: + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +/* We need to create an function-descriptors for any internal function + which is referenced. */ +static unsigned int +count_fdescs (const Elf64_Rela *rela, unsigned int num) +{ + unsigned int i, ret = 0; + + /* Sure, this is order(n^2), but it's usually short, and not time critical. */ + for (i = 0; i < num; i++) { + switch (ELF64_R_TYPE(rela[i].r_info)) { + case R_IA64_FPTR64I: + case R_IA64_FPTR32LSB: + case R_IA64_FPTR32MSB: + case R_IA64_FPTR64LSB: + case R_IA64_FPTR64MSB: + case R_IA64_LTOFF_FPTR22: + case R_IA64_LTOFF_FPTR32LSB: + case R_IA64_LTOFF_FPTR32MSB: + case R_IA64_LTOFF_FPTR64I: + case R_IA64_LTOFF_FPTR64LSB: + case R_IA64_LTOFF_FPTR64MSB: + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + /* + * Jumps to static functions sometimes go straight to their + * offset. Of course, that may not be possible if the jump is + * from init -> core or vice. versa, so we need to generate an + * FDESC (and PLT etc) for that. + */ + case R_IA64_PCREL21B: + if (!duplicate_reloc(rela, i)) + ret++; + break; + } + } + return ret; +} + +int +module_frob_arch_sections (Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, char *secstrings, + struct module *mod) +{ + unsigned long core_plts = 0, init_plts = 0, gots = 0, fdescs = 0; + Elf64_Shdr *s, *sechdrs_end = sechdrs + ehdr->e_shnum; + + /* + * To store the PLTs and function-descriptors, we expand the .text section for + * core module-code and the .init.text section for initialization code. + */ + for (s = sechdrs; s < sechdrs_end; ++s) + if (strcmp(".core.plt", secstrings + s->sh_name) == 0) + mod->arch.core_plt = s; + else if (strcmp(".init.plt", secstrings + s->sh_name) == 0) + mod->arch.init_plt = s; + else if (strcmp(".got", secstrings + s->sh_name) == 0) + mod->arch.got = s; + else if (strcmp(".opd", secstrings + s->sh_name) == 0) + mod->arch.opd = s; + else if (strcmp(".IA_64.unwind", secstrings + s->sh_name) == 0) + mod->arch.unwind = s; + + if (!mod->arch.core_plt || !mod->arch.init_plt || !mod->arch.got || !mod->arch.opd) { + printk(KERN_ERR "%s: sections missing\n", mod->name); + return -ENOEXEC; + } + + /* GOT and PLTs can occur in any relocated section... */ + for (s = sechdrs + 1; s < sechdrs_end; ++s) { + const Elf64_Rela *rels = (void *)ehdr + s->sh_offset; + unsigned long numrels = s->sh_size/sizeof(Elf64_Rela); + + if (s->sh_type != SHT_RELA) + continue; + + gots += count_gots(rels, numrels); + fdescs += count_fdescs(rels, numrels); + if (strstr(secstrings + s->sh_name, ".init")) + init_plts += count_plts(rels, numrels); + else + core_plts += count_plts(rels, numrels); + } + + mod->arch.core_plt->sh_type = SHT_NOBITS; + mod->arch.core_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.core_plt->sh_addralign = 16; + mod->arch.core_plt->sh_size = core_plts * sizeof(struct plt_entry); + mod->arch.init_plt->sh_type = SHT_NOBITS; + mod->arch.init_plt->sh_flags = SHF_EXECINSTR | SHF_ALLOC; + mod->arch.init_plt->sh_addralign = 16; + mod->arch.init_plt->sh_size = init_plts * sizeof(struct plt_entry); + mod->arch.got->sh_type = SHT_NOBITS; + mod->arch.got->sh_flags = ARCH_SHF_SMALL | SHF_ALLOC; + mod->arch.got->sh_addralign = 8; + mod->arch.got->sh_size = gots * sizeof(struct got_entry); + mod->arch.opd->sh_type = SHT_NOBITS; + mod->arch.opd->sh_flags = SHF_ALLOC; + mod->arch.opd->sh_addralign = 8; + mod->arch.opd->sh_size = fdescs * sizeof(struct fdesc); + DEBUGP("%s: core.plt=%lx, init.plt=%lx, got=%lx, fdesc=%lx\n", + __func__, mod->arch.core_plt->sh_size, mod->arch.init_plt->sh_size, + mod->arch.got->sh_size, mod->arch.opd->sh_size); + return 0; +} + +static inline bool +in_init (const struct module *mod, uint64_t addr) +{ + return within_module_init(addr, mod); +} + +static inline bool +in_core (const struct module *mod, uint64_t addr) +{ + return within_module_core(addr, mod); +} + +static inline bool +is_internal (const struct module *mod, uint64_t value) +{ + return in_init(mod, value) || in_core(mod, value); +} + +/* + * Get gp-relative offset for the linkage-table entry of VALUE. + */ +static uint64_t +get_ltoff (struct module *mod, uint64_t value, int *okp) +{ + struct got_entry *got, *e; + + if (!*okp) + return 0; + + got = (void *) mod->arch.got->sh_addr; + for (e = got; e < got + mod->arch.next_got_entry; ++e) + if (e->val == value) + goto found; + + /* Not enough GOT entries? */ + BUG_ON(e >= (struct got_entry *) (mod->arch.got->sh_addr + mod->arch.got->sh_size)); + + e->val = value; + ++mod->arch.next_got_entry; + found: + return (uint64_t) e - mod->arch.gp; +} + +static inline int +gp_addressable (struct module *mod, uint64_t value) +{ + return value - mod->arch.gp + MAX_LTOFF/2 < MAX_LTOFF; +} + +/* Get PC-relative PLT entry for this value. Returns 0 on failure. */ +static uint64_t +get_plt (struct module *mod, const struct insn *insn, uint64_t value, int *okp) +{ + struct plt_entry *plt, *plt_end; + uint64_t target_ip, target_gp; + + if (!*okp) + return 0; + + if (in_init(mod, (uint64_t) insn)) { + plt = (void *) mod->arch.init_plt->sh_addr; + plt_end = (void *) plt + mod->arch.init_plt->sh_size; + } else { + plt = (void *) mod->arch.core_plt->sh_addr; + plt_end = (void *) plt + mod->arch.core_plt->sh_size; + } + + /* "value" is a pointer to a function-descriptor; fetch the target ip/gp from it: */ + target_ip = ((uint64_t *) value)[0]; + target_gp = ((uint64_t *) value)[1]; + + /* Look for existing PLT entry. */ + while (plt->bundle[0][0]) { + if (plt_target(plt) == target_ip) + goto found; + if (++plt >= plt_end) + BUG(); + } + *plt = ia64_plt_template; + if (!patch_plt(mod, plt, target_ip, target_gp)) { + *okp = 0; + return 0; + } +#if ARCH_MODULE_DEBUG + if (plt_target(plt) != target_ip) { + printk("%s: mistargeted PLT: wanted %lx, got %lx\n", + __func__, target_ip, plt_target(plt)); + *okp = 0; + return 0; + } +#endif + found: + return (uint64_t) plt; +} + +/* Get function descriptor for VALUE. */ +static uint64_t +get_fdesc (struct module *mod, uint64_t value, int *okp) +{ + struct fdesc *fdesc = (void *) mod->arch.opd->sh_addr; + + if (!*okp) + return 0; + + if (!value) { + printk(KERN_ERR "%s: fdesc for zero requested!\n", mod->name); + return 0; + } + + if (!is_internal(mod, value)) + /* + * If it's not a module-local entry-point, "value" already points to a + * function-descriptor. + */ + return value; + + /* Look for existing function descriptor. */ + while (fdesc->addr) { + if (fdesc->addr == value) + return (uint64_t)fdesc; + if ((uint64_t) ++fdesc >= mod->arch.opd->sh_addr + mod->arch.opd->sh_size) + BUG(); + } + + /* Create new one */ + fdesc->addr = value; + fdesc->gp = mod->arch.gp; + return (uint64_t) fdesc; +} + +static inline int +do_reloc (struct module *mod, uint8_t r_type, Elf64_Sym *sym, uint64_t addend, + Elf64_Shdr *sec, void *location) +{ + enum reloc_target_format format = (r_type >> FORMAT_SHIFT) & FORMAT_MASK; + enum reloc_value_formula formula = (r_type >> VALUE_SHIFT) & VALUE_MASK; + uint64_t val; + int ok = 1; + + val = sym->st_value + addend; + + switch (formula) { + case RV_SEGREL: /* segment base is arbitrarily chosen to be 0 for kernel modules */ + case RV_DIRECT: + break; + + case RV_GPREL: val -= mod->arch.gp; break; + case RV_LTREL: val = get_ltoff(mod, val, &ok); break; + case RV_PLTREL: val = get_plt(mod, location, val, &ok); break; + case RV_FPTR: val = get_fdesc(mod, val, &ok); break; + case RV_SECREL: val -= sec->sh_addr; break; + case RV_LTREL_FPTR: val = get_ltoff(mod, get_fdesc(mod, val, &ok), &ok); break; + + case RV_PCREL: + switch (r_type) { + case R_IA64_PCREL21B: + if ((in_init(mod, val) && in_core(mod, (uint64_t)location)) || + (in_core(mod, val) && in_init(mod, (uint64_t)location))) { + /* + * Init section may have been allocated far away from core, + * if the branch won't reach, then allocate a plt for it. + */ + uint64_t delta = ((int64_t)val - (int64_t)location) / 16; + if (delta + (1 << 20) >= (1 << 21)) { + val = get_fdesc(mod, val, &ok); + val = get_plt(mod, location, val, &ok); + } + } else if (!is_internal(mod, val)) + val = get_plt(mod, location, val, &ok); + fallthrough; + default: + val -= bundle(location); + break; + + case R_IA64_PCREL32MSB: + case R_IA64_PCREL32LSB: + case R_IA64_PCREL64MSB: + case R_IA64_PCREL64LSB: + val -= (uint64_t) location; + break; + + } + switch (r_type) { + case R_IA64_PCREL60B: format = RF_INSN60; break; + case R_IA64_PCREL21B: format = RF_INSN21B; break; + case R_IA64_PCREL21M: format = RF_INSN21M; break; + case R_IA64_PCREL21F: format = RF_INSN21F; break; + default: break; + } + break; + + case RV_BDREL: + val -= (uint64_t) (in_init(mod, val) ? mod->mem[MOD_INIT_TEXT].base : + mod->mem[MOD_TEXT].base); + break; + + case RV_LTV: + /* can link-time value relocs happen here? */ + BUG(); + break; + + case RV_PCREL2: + if (r_type == R_IA64_PCREL21BI) { + if (!is_internal(mod, val)) { + printk(KERN_ERR "%s: %s reloc against " + "non-local symbol (%lx)\n", __func__, + reloc_name[r_type], (unsigned long)val); + return -ENOEXEC; + } + format = RF_INSN21B; + } + val -= bundle(location); + break; + + case RV_SPECIAL: + switch (r_type) { + case R_IA64_IPLTMSB: + case R_IA64_IPLTLSB: + val = get_fdesc(mod, get_plt(mod, location, val, &ok), &ok); + format = RF_64LSB; + if (r_type == R_IA64_IPLTMSB) + format = RF_64MSB; + break; + + case R_IA64_SUB: + val = addend - sym->st_value; + format = RF_INSN64; + break; + + case R_IA64_LTOFF22X: + if (gp_addressable(mod, val)) + val -= mod->arch.gp; + else + val = get_ltoff(mod, val, &ok); + format = RF_INSN22; + break; + + case R_IA64_LDXMOV: + if (gp_addressable(mod, val)) { + /* turn "ld8" into "mov": */ + DEBUGP("%s: patching ld8 at %p to mov\n", __func__, location); + ia64_patch((u64) location, 0x1fff80fe000UL, 0x10000000000UL); + } + return 0; + + default: + if (reloc_name[r_type]) + printk(KERN_ERR "%s: special reloc %s not supported", + mod->name, reloc_name[r_type]); + else + printk(KERN_ERR "%s: unknown special reloc %x\n", + mod->name, r_type); + return -ENOEXEC; + } + break; + + case RV_TPREL: + case RV_LTREL_TPREL: + case RV_DTPMOD: + case RV_LTREL_DTPMOD: + case RV_DTPREL: + case RV_LTREL_DTPREL: + printk(KERN_ERR "%s: %s reloc not supported\n", + mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?"); + return -ENOEXEC; + + default: + printk(KERN_ERR "%s: unknown reloc %x\n", mod->name, r_type); + return -ENOEXEC; + } + + if (!ok) + return -ENOEXEC; + + DEBUGP("%s: [%p]<-%016lx = %s(%lx)\n", __func__, location, val, + reloc_name[r_type] ? reloc_name[r_type] : "?", sym->st_value + addend); + + switch (format) { + case RF_INSN21B: ok = apply_imm21b(mod, location, (int64_t) val / 16); break; + case RF_INSN22: ok = apply_imm22(mod, location, val); break; + case RF_INSN64: ok = apply_imm64(mod, location, val); break; + case RF_INSN60: ok = apply_imm60(mod, location, (int64_t) val / 16); break; + case RF_32LSB: put_unaligned(val, (uint32_t *) location); break; + case RF_64LSB: put_unaligned(val, (uint64_t *) location); break; + case RF_32MSB: /* ia64 Linux is little-endian... */ + case RF_64MSB: /* ia64 Linux is little-endian... */ + case RF_INSN14: /* must be within-module, i.e., resolved by "ld -r" */ + case RF_INSN21M: /* must be within-module, i.e., resolved by "ld -r" */ + case RF_INSN21F: /* must be within-module, i.e., resolved by "ld -r" */ + printk(KERN_ERR "%s: format %u needed by %s reloc is not supported\n", + mod->name, format, reloc_name[r_type] ? reloc_name[r_type] : "?"); + return -ENOEXEC; + + default: + printk(KERN_ERR "%s: relocation %s resulted in unknown format %u\n", + mod->name, reloc_name[r_type] ? reloc_name[r_type] : "?", format); + return -ENOEXEC; + } + return ok ? 0 : -ENOEXEC; +} + +int +apply_relocate_add (Elf64_Shdr *sechdrs, const char *strtab, unsigned int symindex, + unsigned int relsec, struct module *mod) +{ + unsigned int i, n = sechdrs[relsec].sh_size / sizeof(Elf64_Rela); + Elf64_Rela *rela = (void *) sechdrs[relsec].sh_addr; + Elf64_Shdr *target_sec; + int ret; + + DEBUGP("%s: applying section %u (%u relocs) to %u\n", __func__, + relsec, n, sechdrs[relsec].sh_info); + + target_sec = sechdrs + sechdrs[relsec].sh_info; + + if (target_sec->sh_entsize == ~0UL) + /* + * If target section wasn't allocated, we don't need to relocate it. + * Happens, e.g., for debug sections. + */ + return 0; + + if (!mod->arch.gp) { + /* + * XXX Should have an arch-hook for running this after final section + * addresses have been selected... + */ + uint64_t gp; + struct module_memory *mod_mem; + + mod_mem = &mod->mem[MOD_DATA]; + if (mod_mem->size > MAX_LTOFF) + /* + * This takes advantage of fact that SHF_ARCH_SMALL gets allocated + * at the end of the module. + */ + gp = mod_mem->size - MAX_LTOFF / 2; + else + gp = mod_mem->size / 2; + gp = (uint64_t) mod_mem->base + ((gp + 7) & -8); + mod->arch.gp = gp; + DEBUGP("%s: placing gp at 0x%lx\n", __func__, gp); + } + + for (i = 0; i < n; i++) { + ret = do_reloc(mod, ELF64_R_TYPE(rela[i].r_info), + ((Elf64_Sym *) sechdrs[symindex].sh_addr + + ELF64_R_SYM(rela[i].r_info)), + rela[i].r_addend, target_sec, + (void *) target_sec->sh_addr + rela[i].r_offset); + if (ret < 0) + return ret; + } + return 0; +} + +/* + * Modules contain a single unwind table which covers both the core and the init text + * sections but since the two are not contiguous, we need to split this table up such that + * we can register (and unregister) each "segment" separately. Fortunately, this sounds + * more complicated than it really is. + */ +static void +register_unwind_table (struct module *mod) +{ + struct unw_table_entry *start = (void *) mod->arch.unwind->sh_addr; + struct unw_table_entry *end = start + mod->arch.unwind->sh_size / sizeof (*start); + struct unw_table_entry *e1, *e2, *core, *init; + unsigned long num_init = 0, num_core = 0; + + /* First, count how many init and core unwind-table entries there are. */ + for (e1 = start; e1 < end; ++e1) + if (in_init(mod, e1->start_offset)) + ++num_init; + else + ++num_core; + /* + * Second, sort the table such that all unwind-table entries for the init and core + * text sections are nicely separated. We do this with a stupid bubble sort + * (unwind tables don't get ridiculously huge). + */ + for (e1 = start; e1 < end; ++e1) { + for (e2 = e1 + 1; e2 < end; ++e2) { + if (e2->start_offset < e1->start_offset) { + swap(*e1, *e2); + } + } + } + /* + * Third, locate the init and core segments in the unwind table: + */ + if (in_init(mod, start->start_offset)) { + init = start; + core = start + num_init; + } else { + core = start; + init = start + num_core; + } + + DEBUGP("%s: name=%s, gp=%lx, num_init=%lu, num_core=%lu\n", __func__, + mod->name, mod->arch.gp, num_init, num_core); + + /* + * Fourth, register both tables (if not empty). + */ + if (num_core > 0) { + mod->arch.core_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, + core, core + num_core); + DEBUGP("%s: core: handle=%p [%p-%p)\n", __func__, + mod->arch.core_unw_table, core, core + num_core); + } + if (num_init > 0) { + mod->arch.init_unw_table = unw_add_unwind_table(mod->name, 0, mod->arch.gp, + init, init + num_init); + DEBUGP("%s: init: handle=%p [%p-%p)\n", __func__, + mod->arch.init_unw_table, init, init + num_init); + } +} + +int +module_finalize (const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) +{ + struct mod_arch_specific *mas = &mod->arch; + + DEBUGP("%s: init: entry=%p\n", __func__, mod->init); + if (mas->unwind) + register_unwind_table(mod); + + /* + * ".opd" was already relocated to the final destination. Store + * it's address for use in symbolizer. + */ + mas->opd_addr = (void *)mas->opd->sh_addr; + mas->opd_size = mas->opd->sh_size; + + /* + * Module relocation was already done at this point. Section + * headers are about to be deleted. Wipe out load-time context. + */ + mas->core_plt = NULL; + mas->init_plt = NULL; + mas->got = NULL; + mas->opd = NULL; + mas->unwind = NULL; + mas->gp = 0; + mas->next_got_entry = 0; + + return 0; +} + +void +module_arch_cleanup (struct module *mod) +{ + if (mod->arch.init_unw_table) { + unw_remove_unwind_table(mod->arch.init_unw_table); + mod->arch.init_unw_table = NULL; + } + if (mod->arch.core_unw_table) { + unw_remove_unwind_table(mod->arch.core_unw_table); + mod->arch.core_unw_table = NULL; + } +} + +void *dereference_module_function_descriptor(struct module *mod, void *ptr) +{ + struct mod_arch_specific *mas = &mod->arch; + + if (ptr < mas->opd_addr || ptr >= mas->opd_addr + mas->opd_size) + return ptr; + + return dereference_function_descriptor(ptr); +} diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c new file mode 100644 index 000000000000..025e5133c860 --- /dev/null +++ b/arch/ia64/kernel/msi_ia64.c @@ -0,0 +1,198 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MSI hooks for standard x86 apic + */ + +#include +#include +#include +#include +#include +#include + +static struct irq_chip ia64_msi_chip; + +#ifdef CONFIG_SMP +static int ia64_set_msi_irq_affinity(struct irq_data *idata, + const cpumask_t *cpu_mask, bool force) +{ + struct msi_msg msg; + u32 addr, data; + int cpu = cpumask_first_and(cpu_mask, cpu_online_mask); + unsigned int irq = idata->irq; + + if (irq_prepare_move(irq, cpu)) + return -1; + + __get_cached_msi_msg(irq_data_get_msi_desc(idata), &msg); + + addr = msg.address_lo; + addr &= MSI_ADDR_DEST_ID_MASK; + addr |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); + msg.address_lo = addr; + + data = msg.data; + data &= MSI_DATA_VECTOR_MASK; + data |= MSI_DATA_VECTOR(irq_to_vector(irq)); + msg.data = data; + + pci_write_msi_msg(irq, &msg); + irq_data_update_affinity(idata, cpumask_of(cpu)); + + return 0; +} +#endif /* CONFIG_SMP */ + +int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc) +{ + struct msi_msg msg; + unsigned long dest_phys_id; + int irq, vector; + + irq = create_irq(); + if (irq < 0) + return irq; + + irq_set_msi_desc(irq, desc); + dest_phys_id = cpu_physical_id(cpumask_any_and(&(irq_to_domain(irq)), + cpu_online_mask)); + vector = irq_to_vector(irq); + + msg.address_hi = 0; + msg.address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DEST_MODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID_CPU(dest_phys_id); + + msg.data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(vector); + + pci_write_msi_msg(irq, &msg); + irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq); + + return 0; +} + +void arch_teardown_msi_irq(unsigned int irq) +{ + destroy_irq(irq); +} + +static void ia64_ack_msi_irq(struct irq_data *data) +{ + irq_complete_move(data->irq); + irq_move_irq(data); + ia64_eoi(); +} + +static int ia64_msi_retrigger_irq(struct irq_data *data) +{ + unsigned int vector = irq_to_vector(data->irq); + ia64_resend_irq(vector); + + return 1; +} + +/* + * Generic ops used on most IA64 platforms. + */ +static struct irq_chip ia64_msi_chip = { + .name = "PCI-MSI", + .irq_mask = pci_msi_mask_irq, + .irq_unmask = pci_msi_unmask_irq, + .irq_ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = ia64_set_msi_irq_affinity, +#endif + .irq_retrigger = ia64_msi_retrigger_irq, +}; + +#ifdef CONFIG_INTEL_IOMMU +#ifdef CONFIG_SMP +static int dmar_msi_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + unsigned int irq = data->irq; + struct irq_cfg *cfg = irq_cfg + irq; + struct msi_msg msg; + int cpu = cpumask_first_and(mask, cpu_online_mask); + + if (irq_prepare_move(irq, cpu)) + return -1; + + dmar_msi_read(irq, &msg); + + msg.data &= ~MSI_DATA_VECTOR_MASK; + msg.data |= MSI_DATA_VECTOR(cfg->vector); + msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; + msg.address_lo |= MSI_ADDR_DEST_ID_CPU(cpu_physical_id(cpu)); + + dmar_msi_write(irq, &msg); + irq_data_update_affinity(data, mask); + + return 0; +} +#endif /* CONFIG_SMP */ + +static struct irq_chip dmar_msi_type = { + .name = "DMAR_MSI", + .irq_unmask = dmar_msi_unmask, + .irq_mask = dmar_msi_mask, + .irq_ack = ia64_ack_msi_irq, +#ifdef CONFIG_SMP + .irq_set_affinity = dmar_msi_set_affinity, +#endif + .irq_retrigger = ia64_msi_retrigger_irq, +}; + +static void +msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_msg *msg) +{ + struct irq_cfg *cfg = irq_cfg + irq; + unsigned dest; + + dest = cpu_physical_id(cpumask_first_and(&(irq_to_domain(irq)), + cpu_online_mask)); + + msg->address_hi = 0; + msg->address_lo = + MSI_ADDR_HEADER | + MSI_ADDR_DEST_MODE_PHYS | + MSI_ADDR_REDIRECTION_CPU | + MSI_ADDR_DEST_ID_CPU(dest); + + msg->data = + MSI_DATA_TRIGGER_EDGE | + MSI_DATA_LEVEL_ASSERT | + MSI_DATA_DELIVERY_FIXED | + MSI_DATA_VECTOR(cfg->vector); +} + +int dmar_alloc_hwirq(int id, int node, void *arg) +{ + int irq; + struct msi_msg msg; + + irq = create_irq(); + if (irq > 0) { + irq_set_handler_data(irq, arg); + irq_set_chip_and_handler_name(irq, &dmar_msi_type, + handle_edge_irq, "edge"); + msi_compose_msg(NULL, irq, &msg); + dmar_msi_write(irq, &msg); + } + + return irq; +} + +void dmar_free_hwirq(int irq) +{ + irq_set_handler_data(irq, NULL); + destroy_irq(irq); +} +#endif /* CONFIG_INTEL_IOMMU */ + diff --git a/arch/ia64/kernel/numa.c b/arch/ia64/kernel/numa.c new file mode 100644 index 000000000000..8a959f20662d --- /dev/null +++ b/arch/ia64/kernel/numa.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * + * ia64 kernel NUMA specific stuff + * + * Copyright (C) 2002 Erich Focht + * Copyright (C) 2004 Silicon Graphics, Inc. + * Jesse Barnes + */ +#include +#include +#include +#include + +u16 cpu_to_node_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_to_node_map); + +cpumask_t node_to_cpu_mask[MAX_NUMNODES] __cacheline_aligned; +EXPORT_SYMBOL(node_to_cpu_mask); + +void map_cpu_to_node(int cpu, int nid) +{ + int oldnid; + if (nid < 0) { /* just initialize by zero */ + cpu_to_node_map[cpu] = 0; + return; + } + /* sanity check first */ + oldnid = cpu_to_node_map[cpu]; + if (cpumask_test_cpu(cpu, &node_to_cpu_mask[oldnid])) { + return; /* nothing to do */ + } + /* we don't have cpu-driven node hot add yet... + In usual case, node is created from SRAT at boot time. */ + if (!node_online(nid)) + nid = first_online_node; + cpu_to_node_map[cpu] = nid; + cpumask_set_cpu(cpu, &node_to_cpu_mask[nid]); + return; +} + +void unmap_cpu_from_node(int cpu, int nid) +{ + WARN_ON(!cpumask_test_cpu(cpu, &node_to_cpu_mask[nid])); + WARN_ON(cpu_to_node_map[cpu] != nid); + cpu_to_node_map[cpu] = 0; + cpumask_clear_cpu(cpu, &node_to_cpu_mask[nid]); +} + + +/** + * build_cpu_to_node_map - setup cpu to node and node to cpumask arrays + * + * Build cpu to node mapping and initialize the per node cpu masks using + * info from the node_cpuid array handed to us by ACPI. + */ +void __init build_cpu_to_node_map(void) +{ + int cpu, i, node; + + for(node=0; node < MAX_NUMNODES; node++) + cpumask_clear(&node_to_cpu_mask[node]); + + for_each_possible_early_cpu(cpu) { + node = NUMA_NO_NODE; + for (i = 0; i < NR_CPUS; ++i) + if (cpu_physical_id(cpu) == node_cpuid[i].phys_id) { + node = node_cpuid[i].nid; + break; + } + map_cpu_to_node(cpu, node); + } +} diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S new file mode 100644 index 000000000000..fb6db6966f70 --- /dev/null +++ b/arch/ia64/kernel/pal.S @@ -0,0 +1,306 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * PAL Firmware support + * IA-64 Processor Programmers Reference Vol 2 + * + * Copyright (C) 1999 Don Dugger + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999-2001, 2003 Hewlett-Packard Co + * David Mosberger + * Stephane Eranian + * + * 05/22/2000 eranian Added support for stacked register calls + * 05/24/2000 eranian Added support for physical mode static calls + */ + +#include +#include +#include + + .data +pal_entry_point: + data8 ia64_pal_default_handler + .text + +/* + * Set the PAL entry point address. This could be written in C code, but we + * do it here to keep it all in one module (besides, it's so trivial that it's + * not a big deal). + * + * in0 Address of the PAL entry point (text address, NOT a function + * descriptor). + */ +GLOBAL_ENTRY(ia64_pal_handler_init) + alloc r3=ar.pfs,1,0,0,0 + movl r2=pal_entry_point + ;; + st8 [r2]=in0 + br.ret.sptk.many rp +END(ia64_pal_handler_init) + +/* + * Default PAL call handler. This needs to be coded in assembly because it + * uses the static calling convention, i.e., the RSE may not be used and + * calls are done via "br.cond" (not "br.call"). + */ +GLOBAL_ENTRY(ia64_pal_default_handler) + mov r8=-1 + br.cond.sptk.many rp +END(ia64_pal_default_handler) + +/* + * Make a PAL call using the static calling convention. + * + * in0 Index of PAL service + * in1 - in3 Remaining PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_static) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,5,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 + mov r29 = in1 + mov r8 = ip + } + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + adds r8 = 1f-1b,r8 + mov loc4=ar.rsc // save RSE configuration + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov loc3 = psr + mov loc0 = rp + .body + mov r30 = in2 + + mov r31 = in3 + mov b7 = loc2 + + rsm psr.i + ;; + mov rp = r8 + br.cond.sptk.many b7 +1: mov psr.l = loc3 + mov ar.rsc = loc4 // restore RSE configuration + mov ar.pfs = loc1 + mov rp = loc0 + ;; + srlz.d // serialize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_static) +EXPORT_SYMBOL(ia64_pal_call_static) + +/* + * Make a PAL call using the stacked registers calling convention. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaining PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_stacked) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,4,4,0 + movl loc2 = pal_entry_point + + mov r28 = in0 // Index MUST be copied to r28 + mov out0 = in0 // AND in0 of PAL function + mov loc0 = rp + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov out1 = in1 + mov out2 = in2 + mov out3 = in3 + mov loc3 = psr + ;; + rsm psr.i + mov b7 = loc2 + ;; + br.call.sptk.many rp=b7 // now make the call +.ret0: mov psr.l = loc3 + mov ar.pfs = loc1 + mov rp = loc0 + ;; + srlz.d // serialize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_stacked) +EXPORT_SYMBOL(ia64_pal_call_stacked) + +/* + * Make a physical mode PAL call using the static registers calling convention. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaining PAL arguments + * + * PSR_LP, PSR_TB, PSR_ID, PSR_DA are never set by the kernel. + * So we don't need to clear them. + */ +#define PAL_PSR_BITS_TO_CLEAR \ + (IA64_PSR_I | IA64_PSR_IT | IA64_PSR_DT | IA64_PSR_DB | IA64_PSR_RT |\ + IA64_PSR_DD | IA64_PSR_SS | IA64_PSR_RI | IA64_PSR_ED | \ + IA64_PSR_DFL | IA64_PSR_DFH) + +#define PAL_PSR_BITS_TO_SET \ + (IA64_PSR_BN) + + +GLOBAL_ENTRY(ia64_pal_call_phys_static) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4) + alloc loc1 = ar.pfs,4,7,0,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov r8 = ip // save ip to compute branch + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov r29 = in1 // first argument + mov r30 = in2 // copy arg2 + mov r31 = in3 // copy arg3 + ;; + mov loc3 = psr // save psr + adds r8 = 1f-1b,r8 // calculate return address for call + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + tpa r8=r8 // convert rp to physical + ;; + mov b7 = loc2 // install target to branch reg + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys + mov rp = r8 // install return address (physical) + mov loc5 = r19 + mov loc6 = r20 + br.cond.sptk.many b7 +1: + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode + mov psr.l = loc3 // restore init PSR + + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // serialize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_phys_static) +EXPORT_SYMBOL(ia64_pal_call_phys_static) + +/* + * Make a PAL call using the stacked registers in physical mode. + * + * Inputs: + * in0 Index of PAL service + * in2 - in3 Remaining PAL arguments + */ +GLOBAL_ENTRY(ia64_pal_call_phys_stacked) + .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5) + alloc loc1 = ar.pfs,5,7,4,0 + movl loc2 = pal_entry_point +1: { + mov r28 = in0 // copy procedure index + mov loc0 = rp // save rp + } + .body + ;; + ld8 loc2 = [loc2] // loc2 <- entry point + mov loc3 = psr // save psr + ;; + mov loc4=ar.rsc // save RSE configuration + dep.z loc2=loc2,0,61 // convert pal entry point to physical + ;; + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + movl r16=PAL_PSR_BITS_TO_CLEAR + movl r17=PAL_PSR_BITS_TO_SET + ;; + or loc3=loc3,r17 // add in psr the bits to set + mov b7 = loc2 // install target to branch reg + ;; + andcm r16=loc3,r16 // removes bits to clear from psr + br.call.sptk.many rp=ia64_switch_mode_phys + + mov out0 = in0 // first argument + mov out1 = in1 // copy arg2 + mov out2 = in2 // copy arg3 + mov out3 = in3 // copy arg3 + mov loc5 = r19 + mov loc6 = r20 + + br.call.sptk.many rp=b7 // now make the call + + mov ar.rsc=0 // put RSE in enforced lazy, LE mode + mov r16=loc3 // r16= original psr + mov r19=loc5 + mov r20=loc6 + br.call.sptk.many rp=ia64_switch_mode_virt // return to virtual mode + + mov psr.l = loc3 // restore init PSR + mov ar.pfs = loc1 + mov rp = loc0 + ;; + mov ar.rsc=loc4 // restore RSE configuration + srlz.d // serialize restoration of psr.l + br.ret.sptk.many b0 +END(ia64_pal_call_phys_stacked) +EXPORT_SYMBOL(ia64_pal_call_phys_stacked) + +/* + * Save scratch fp scratch regs which aren't saved in pt_regs already + * (fp10-fp15). + * + * NOTE: We need to do this since firmware (SAL and PAL) may use any of the + * scratch regs fp-low partition. + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_save_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + stf.spill [in0] = f10,32 + stf.spill [r2] = f11,32 + ;; + stf.spill [in0] = f12,32 + stf.spill [r2] = f13,32 + ;; + stf.spill [in0] = f14,32 + stf.spill [r2] = f15,32 + br.ret.sptk.many rp +END(ia64_save_scratch_fpregs) +EXPORT_SYMBOL(ia64_save_scratch_fpregs) + +/* + * Load scratch fp scratch regs (fp10-fp15) + * + * Inputs: + * in0 Address of stack storage for fp regs + */ +GLOBAL_ENTRY(ia64_load_scratch_fpregs) + alloc r3=ar.pfs,1,0,0,0 + add r2=16,in0 + ;; + ldf.fill f10 = [in0],32 + ldf.fill f11 = [r2],32 + ;; + ldf.fill f12 = [in0],32 + ldf.fill f13 = [r2],32 + ;; + ldf.fill f14 = [in0],32 + ldf.fill f15 = [r2],32 + br.ret.sptk.many rp +END(ia64_load_scratch_fpregs) +EXPORT_SYMBOL(ia64_load_scratch_fpregs) diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c new file mode 100644 index 000000000000..b9ae093bfe37 --- /dev/null +++ b/arch/ia64/kernel/palinfo.c @@ -0,0 +1,942 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * palinfo.c + * + * Prints processor specific information reported by PAL. + * This code is based on specification of PAL as of the + * Intel IA-64 Architecture Software Developer's Manual v1.0. + * + * + * Copyright (C) 2000-2001, 2003 Hewlett-Packard Co + * Stephane Eranian + * Copyright (C) 2004 Intel Corporation + * Ashok Raj + * + * 05/26/2000 S.Eranian initial release + * 08/21/2000 S.Eranian updated to July 2000 PAL specs + * 02/05/2001 S.Eranian fixed module support + * 10/23/2001 S.Eranian updated pal_perf_mon_info bug fixes + * 03/24/2004 Ashok Raj updated to work with CPU Hotplug + * 10/26/2006 Russ Anderson updated processor features to rev 2.2 spec + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +MODULE_AUTHOR("Stephane Eranian "); +MODULE_DESCRIPTION("/proc interface to IA-64 PAL"); +MODULE_LICENSE("GPL"); + +#define PALINFO_VERSION "0.5" + +typedef int (*palinfo_func_t)(struct seq_file *); + +typedef struct { + const char *name; /* name of the proc entry */ + palinfo_func_t proc_read; /* function to call for reading */ + struct proc_dir_entry *entry; /* registered entry (removal) */ +} palinfo_entry_t; + + +/* + * A bunch of string array to get pretty printing + */ + +static const char *cache_types[] = { + "", /* not used */ + "Instruction", + "Data", + "Data/Instruction" /* unified */ +}; + +static const char *cache_mattrib[]={ + "WriteThrough", + "WriteBack", + "", /* reserved */ + "" /* reserved */ +}; + +static const char *cache_st_hints[]={ + "Temporal, level 1", + "Reserved", + "Reserved", + "Non-temporal, all levels", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +static const char *cache_ld_hints[]={ + "Temporal, level 1", + "Non-temporal, level 1", + "Reserved", + "Non-temporal, all levels", + "Reserved", + "Reserved", + "Reserved", + "Reserved" +}; + +static const char *rse_hints[]={ + "enforced lazy", + "eager stores", + "eager loads", + "eager loads and stores" +}; + +#define RSE_HINTS_COUNT ARRAY_SIZE(rse_hints) + +static const char *mem_attrib[]={ + "WB", /* 000 */ + "SW", /* 001 */ + "010", /* 010 */ + "011", /* 011 */ + "UC", /* 100 */ + "UCE", /* 101 */ + "WC", /* 110 */ + "NaTPage" /* 111 */ +}; + +/* + * Take a 64bit vector and produces a string such that + * if bit n is set then 2^n in clear text is generated. The adjustment + * to the right unit is also done. + * + * Input: + * - a pointer to a buffer to hold the string + * - a 64-bit vector + * Output: + * - a pointer to the end of the buffer + * + */ +static void bitvector_process(struct seq_file *m, u64 vector) +{ + int i,j; + static const char *units[]={ "", "K", "M", "G", "T" }; + + for (i=0, j=0; i < 64; i++ , j=i/10) { + if (vector & 0x1) + seq_printf(m, "%d%s ", 1 << (i-j*10), units[j]); + vector >>= 1; + } +} + +/* + * Take a 64bit vector and produces a string such that + * if bit n is set then register n is present. The function + * takes into account consecutive registers and prints out ranges. + * + * Input: + * - a pointer to a buffer to hold the string + * - a 64-bit vector + * Ouput: + * - a pointer to the end of the buffer + * + */ +static void bitregister_process(struct seq_file *m, u64 *reg_info, int max) +{ + int i, begin, skip = 0; + u64 value = reg_info[0]; + + value >>= i = begin = ffs(value) - 1; + + for(; i < max; i++ ) { + + if (i != 0 && (i%64) == 0) value = *++reg_info; + + if ((value & 0x1) == 0 && skip == 0) { + if (begin <= i - 2) + seq_printf(m, "%d-%d ", begin, i-1); + else + seq_printf(m, "%d ", i-1); + skip = 1; + begin = -1; + } else if ((value & 0x1) && skip == 1) { + skip = 0; + begin = i; + } + value >>=1; + } + if (begin > -1) { + if (begin < 127) + seq_printf(m, "%d-127", begin); + else + seq_puts(m, "127"); + } +} + +static int power_info(struct seq_file *m) +{ + s64 status; + u64 halt_info_buffer[8]; + pal_power_mgmt_info_u_t *halt_info =(pal_power_mgmt_info_u_t *)halt_info_buffer; + int i; + + status = ia64_pal_halt_info(halt_info); + if (status != 0) return 0; + + for (i=0; i < 8 ; i++ ) { + if (halt_info[i].pal_power_mgmt_info_s.im == 1) { + seq_printf(m, + "Power level %d:\n" + "\tentry_latency : %d cycles\n" + "\texit_latency : %d cycles\n" + "\tpower consumption : %d mW\n" + "\tCache+TLB coherency : %s\n", i, + halt_info[i].pal_power_mgmt_info_s.entry_latency, + halt_info[i].pal_power_mgmt_info_s.exit_latency, + halt_info[i].pal_power_mgmt_info_s.power_consumption, + halt_info[i].pal_power_mgmt_info_s.co ? "Yes" : "No"); + } else { + seq_printf(m,"Power level %d: not implemented\n", i); + } + } + return 0; +} + +static int cache_info(struct seq_file *m) +{ + unsigned long i, levels, unique_caches; + pal_cache_config_info_t cci; + int j, k; + long status; + + if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { + printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); + return 0; + } + + seq_printf(m, "Cache levels : %ld\nUnique caches : %ld\n\n", + levels, unique_caches); + + for (i=0; i < levels; i++) { + for (j=2; j >0 ; j--) { + /* even without unification some level may not be present */ + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != 0) + continue; + + seq_printf(m, + "%s Cache level %lu:\n" + "\tSize : %u bytes\n" + "\tAttributes : ", + cache_types[j+cci.pcci_unified], i+1, + cci.pcci_cache_size); + + if (cci.pcci_unified) + seq_puts(m, "Unified "); + + seq_printf(m, "%s\n", cache_mattrib[cci.pcci_cache_attr]); + + seq_printf(m, + "\tAssociativity : %d\n" + "\tLine size : %d bytes\n" + "\tStride : %d bytes\n", + cci.pcci_assoc, + 1<>=1; + } + seq_puts(m, "\n\tLoad hints : "); + + for(k=0; k < 8; k++ ) { + if (cci.pcci_ld_hints & 0x1) + seq_printf(m, "[%s]", cache_ld_hints[k]); + cci.pcci_ld_hints >>=1; + } + seq_printf(m, + "\n\tAlias boundary : %d byte(s)\n" + "\tTag LSB : %d\n" + "\tTag MSB : %d\n", + 1<0 ; j--) { + tc_pages = 0; /* just in case */ + + /* even without unification, some levels may not be present */ + if ((status=ia64_pal_vm_info(i,j, &tc_info, &tc_pages)) != 0) + continue; + + seq_printf(m, + "\n%s Translation Cache Level %d:\n" + "\tHash sets : %d\n" + "\tAssociativity : %d\n" + "\tNumber of entries : %d\n" + "\tFlags : ", + cache_types[j+tc_info.tc_unified], i+1, + tc_info.tc_num_sets, + tc_info.tc_associativity, + tc_info.tc_num_entries); + + if (tc_info.tc_pf) + seq_puts(m, "PreferredPageSizeOptimized "); + if (tc_info.tc_unified) + seq_puts(m, "Unified "); + if (tc_info.tc_reduce_tr) + seq_puts(m, "TCReduction"); + + seq_puts(m, "\n\tSupported page sizes: "); + + bitvector_process(m, tc_pages); + + /* when unified date (j=2) is enough */ + if (tc_info.tc_unified) + break; + } + } + } + + seq_putc(m, '\n'); + return 0; +} + + +static int register_info(struct seq_file *m) +{ + u64 reg_info[2]; + u64 info; + unsigned long phys_stacked; + pal_hints_u_t hints; + unsigned long iregs, dregs; + static const char * const info_type[] = { + "Implemented AR(s)", + "AR(s) with read side-effects", + "Implemented CR(s)", + "CR(s) with read side-effects", + }; + + for(info=0; info < 4; info++) { + if (ia64_pal_register_info(info, ®_info[0], ®_info[1]) != 0) + return 0; + seq_printf(m, "%-32s : ", info_type[info]); + bitregister_process(m, reg_info, 128); + seq_putc(m, '\n'); + } + + if (ia64_pal_rse_info(&phys_stacked, &hints) == 0) + seq_printf(m, + "RSE stacked physical registers : %ld\n" + "RSE load/store hints : %ld (%s)\n", + phys_stacked, hints.ph_data, + hints.ph_data < RSE_HINTS_COUNT ? rse_hints[hints.ph_data]: "(??)"); + + if (ia64_pal_debug_info(&iregs, &dregs)) + return 0; + + seq_printf(m, + "Instruction debug register pairs : %ld\n" + "Data debug register pairs : %ld\n", iregs, dregs); + + return 0; +} + +static const char *const proc_features_0[]={ /* Feature set 0 */ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL, NULL,NULL,NULL,NULL, + "Unimplemented instruction address fault", + "INIT, PMI, and LINT pins", + "Simple unimplemented instr addresses", + "Variable P-state performance", + "Virtual machine features implemented", + "XIP,XPSR,XFS implemented", + "XR1-XR3 implemented", + "Disable dynamic predicate prediction", + "Disable processor physical number", + "Disable dynamic data cache prefetch", + "Disable dynamic inst cache prefetch", + "Disable dynamic branch prediction", + NULL, NULL, NULL, NULL, + "Disable P-states", + "Enable MCA on Data Poisoning", + "Enable vmsw instruction", + "Enable extern environmental notification", + "Disable BINIT on processor time-out", + "Disable dynamic power management (DPM)", + "Disable coherency", + "Disable cache", + "Enable CMCI promotion", + "Enable MCA to BINIT promotion", + "Enable MCA promotion", + "Enable BERR promotion" +}; + +static const char *const proc_features_16[]={ /* Feature set 16 */ + "Disable ETM", + "Enable ETM", + "Enable MCA on half-way timer", + "Enable snoop WC", + NULL, + "Enable Fast Deferral", + "Disable MCA on memory aliasing", + "Enable RSB", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + "DP system processor", + "Low Voltage", + "HT supported", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL +}; + +static const char *const *const proc_features[]={ + proc_features_0, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + proc_features_16, + NULL, NULL, NULL, NULL, +}; + +static void feature_set_info(struct seq_file *m, u64 avail, u64 status, u64 control, + unsigned long set) +{ + const char *const *vf, *const *v; + int i; + + vf = v = proc_features[set]; + for(i=0; i < 64; i++, avail >>=1, status >>=1, control >>=1) { + + if (!(control)) /* No remaining bits set */ + break; + if (!(avail & 0x1)) /* Print only bits that are available */ + continue; + if (vf) + v = vf + i; + if ( v && *v ) { + seq_printf(m, "%-40s : %s %s\n", *v, + avail & 0x1 ? (status & 0x1 ? + "On " : "Off"): "", + avail & 0x1 ? (control & 0x1 ? + "Ctrl" : "NoCtrl"): ""); + } else { + seq_printf(m, "Feature set %2ld bit %2d\t\t\t" + " : %s %s\n", + set, i, + avail & 0x1 ? (status & 0x1 ? + "On " : "Off"): "", + avail & 0x1 ? (control & 0x1 ? + "Ctrl" : "NoCtrl"): ""); + } + } +} + +static int processor_info(struct seq_file *m) +{ + u64 avail=1, status=1, control=1, feature_set=0; + s64 ret; + + do { + ret = ia64_pal_proc_get_features(&avail, &status, &control, + feature_set); + if (ret < 0) + return 0; + + if (ret == 1) { + feature_set++; + continue; + } + + feature_set_info(m, avail, status, control, feature_set); + feature_set++; + } while(1); + + return 0; +} + +static const char *const bus_features[]={ + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL, NULL,NULL, + NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL, + NULL,NULL, + "Request Bus Parking", + "Bus Lock Mask", + "Enable Half Transfer", + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "Enable Cache Line Repl. Shared", + "Enable Cache Line Repl. Exclusive", + "Disable Transaction Queuing", + "Disable Response Error Checking", + "Disable Bus Error Checking", + "Disable Bus Requester Internal Error Signalling", + "Disable Bus Requester Error Signalling", + "Disable Bus Initialization Event Checking", + "Disable Bus Initialization Event Signalling", + "Disable Bus Address Error Checking", + "Disable Bus Address Error Signalling", + "Disable Bus Data Error Checking" +}; + + +static int bus_info(struct seq_file *m) +{ + const char *const *v = bus_features; + pal_bus_features_u_t av, st, ct; + u64 avail, status, control; + int i; + s64 ret; + + if ((ret=ia64_pal_bus_get_features(&av, &st, &ct)) != 0) + return 0; + + avail = av.pal_bus_features_val; + status = st.pal_bus_features_val; + control = ct.pal_bus_features_val; + + for(i=0; i < 64; i++, v++, avail >>=1, status >>=1, control >>=1) { + if ( ! *v ) + continue; + seq_printf(m, "%-48s : %s%s %s\n", *v, + avail & 0x1 ? "" : "NotImpl", + avail & 0x1 ? (status & 0x1 ? "On" : "Off"): "", + avail & 0x1 ? (control & 0x1 ? "Ctrl" : "NoCtrl"): ""); + } + return 0; +} + +static int version_info(struct seq_file *m) +{ + pal_version_u_t min_ver, cur_ver; + + if (ia64_pal_version(&min_ver, &cur_ver) != 0) + return 0; + + seq_printf(m, + "PAL_vendor : 0x%02x (min=0x%02x)\n" + "PAL_A : %02x.%02x (min=%02x.%02x)\n" + "PAL_B : %02x.%02x (min=%02x.%02x)\n", + cur_ver.pal_version_s.pv_pal_vendor, + min_ver.pal_version_s.pv_pal_vendor, + cur_ver.pal_version_s.pv_pal_a_model, + cur_ver.pal_version_s.pv_pal_a_rev, + min_ver.pal_version_s.pv_pal_a_model, + min_ver.pal_version_s.pv_pal_a_rev, + cur_ver.pal_version_s.pv_pal_b_model, + cur_ver.pal_version_s.pv_pal_b_rev, + min_ver.pal_version_s.pv_pal_b_model, + min_ver.pal_version_s.pv_pal_b_rev); + return 0; +} + +static int frequency_info(struct seq_file *m) +{ + struct pal_freq_ratio proc, itc, bus; + unsigned long base; + + if (ia64_pal_freq_base(&base) == -1) + seq_puts(m, "Output clock : not implemented\n"); + else + seq_printf(m, "Output clock : %ld ticks/s\n", base); + + if (ia64_pal_freq_ratios(&proc, &bus, &itc) != 0) return 0; + + seq_printf(m, + "Processor/Clock ratio : %d/%d\n" + "Bus/Clock ratio : %d/%d\n" + "ITC/Clock ratio : %d/%d\n", + proc.num, proc.den, bus.num, bus.den, itc.num, itc.den); + return 0; +} + +static int tr_info(struct seq_file *m) +{ + long status; + pal_tr_valid_u_t tr_valid; + u64 tr_buffer[4]; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + unsigned long i, j; + unsigned long max[3], pgm; + struct ifa_reg { + unsigned long valid:1; + unsigned long ig:11; + unsigned long vpn:52; + } *ifa_reg; + struct itir_reg { + unsigned long rv1:2; + unsigned long ps:6; + unsigned long key:24; + unsigned long rv2:32; + } *itir_reg; + struct gr_reg { + unsigned long p:1; + unsigned long rv1:1; + unsigned long ma:3; + unsigned long a:1; + unsigned long d:1; + unsigned long pl:2; + unsigned long ar:3; + unsigned long ppn:38; + unsigned long rv2:2; + unsigned long ed:1; + unsigned long ig:11; + } *gr_reg; + struct rid_reg { + unsigned long ig1:1; + unsigned long rv1:1; + unsigned long ig2:6; + unsigned long rid:24; + unsigned long rv2:32; + } *rid_reg; + + if ((status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2)) !=0) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + return 0; + } + max[0] = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + max[1] = vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + + for (i=0; i < 2; i++ ) { + for (j=0; j < max[i]; j++) { + + status = ia64_pal_tr_read(j, i, tr_buffer, &tr_valid); + if (status != 0) { + printk(KERN_ERR "palinfo: pal call failed on tr[%lu:%lu]=%ld\n", + i, j, status); + continue; + } + + ifa_reg = (struct ifa_reg *)&tr_buffer[2]; + + if (ifa_reg->valid == 0) + continue; + + gr_reg = (struct gr_reg *)tr_buffer; + itir_reg = (struct itir_reg *)&tr_buffer[1]; + rid_reg = (struct rid_reg *)&tr_buffer[3]; + + pgm = -1 << (itir_reg->ps - 12); + seq_printf(m, + "%cTR%lu: av=%d pv=%d dv=%d mv=%d\n" + "\tppn : 0x%lx\n" + "\tvpn : 0x%lx\n" + "\tps : ", + "ID"[i], j, + tr_valid.pal_tr_valid_s.access_rights_valid, + tr_valid.pal_tr_valid_s.priv_level_valid, + tr_valid.pal_tr_valid_s.dirty_bit_valid, + tr_valid.pal_tr_valid_s.mem_attr_valid, + (gr_reg->ppn & pgm)<< 12, (ifa_reg->vpn & pgm)<< 12); + + bitvector_process(m, 1<< itir_reg->ps); + + seq_printf(m, + "\n\tpl : %d\n" + "\tar : %d\n" + "\trid : %x\n" + "\tp : %d\n" + "\tma : %d\n" + "\td : %d\n", + gr_reg->pl, gr_reg->ar, rid_reg->rid, gr_reg->p, gr_reg->ma, + gr_reg->d); + } + } + return 0; +} + + + +/* + * List {name,function} pairs for every entry in /proc/palinfo/cpu* + */ +static const palinfo_entry_t palinfo_entries[]={ + { "version_info", version_info, }, + { "vm_info", vm_info, }, + { "cache_info", cache_info, }, + { "power_info", power_info, }, + { "register_info", register_info, }, + { "processor_info", processor_info, }, + { "frequency_info", frequency_info, }, + { "bus_info", bus_info }, + { "tr_info", tr_info, } +}; + +#define NR_PALINFO_ENTRIES (int) ARRAY_SIZE(palinfo_entries) + +static struct proc_dir_entry *palinfo_dir; + +/* + * This data structure is used to pass which cpu,function is being requested + * It must fit in a 64bit quantity to be passed to the proc callback routine + * + * In SMP mode, when we get a request for another CPU, we must call that + * other CPU using IPI and wait for the result before returning. + */ +typedef union { + u64 value; + struct { + unsigned req_cpu: 32; /* for which CPU this info is */ + unsigned func_id: 32; /* which function is requested */ + } pal_func_cpu; +} pal_func_cpu_u_t; + +#define req_cpu pal_func_cpu.req_cpu +#define func_id pal_func_cpu.func_id + +#ifdef CONFIG_SMP + +/* + * used to hold information about final function to call + */ +typedef struct { + palinfo_func_t func; /* pointer to function to call */ + struct seq_file *m; /* buffer to store results */ + int ret; /* return value from call */ +} palinfo_smp_data_t; + + +/* + * this function does the actual final call and he called + * from the smp code, i.e., this is the palinfo callback routine + */ +static void +palinfo_smp_call(void *info) +{ + palinfo_smp_data_t *data = (palinfo_smp_data_t *)info; + data->ret = (*data->func)(data->m); +} + +/* + * function called to trigger the IPI, we need to access a remote CPU + * Return: + * 0 : error or nothing to output + * otherwise how many bytes in the "page" buffer were written + */ +static +int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) +{ + palinfo_smp_data_t ptr; + int ret; + + ptr.func = palinfo_entries[f->func_id].proc_read; + ptr.m = m; + ptr.ret = 0; /* just in case */ + + + /* will send IPI to other CPU and wait for completion of remote call */ + if ((ret=smp_call_function_single(f->req_cpu, palinfo_smp_call, &ptr, 1))) { + printk(KERN_ERR "palinfo: remote CPU call from %d to %d on function %d: " + "error %d\n", smp_processor_id(), f->req_cpu, f->func_id, ret); + return 0; + } + return ptr.ret; +} +#else /* ! CONFIG_SMP */ +static +int palinfo_handle_smp(struct seq_file *m, pal_func_cpu_u_t *f) +{ + printk(KERN_ERR "palinfo: should not be called with non SMP kernel\n"); + return 0; +} +#endif /* CONFIG_SMP */ + +/* + * Entry point routine: all calls go through this function + */ +static int proc_palinfo_show(struct seq_file *m, void *v) +{ + pal_func_cpu_u_t *f = (pal_func_cpu_u_t *)&m->private; + + /* + * in SMP mode, we may need to call another CPU to get correct + * information. PAL, by definition, is processor specific + */ + if (f->req_cpu == get_cpu()) + (*palinfo_entries[f->func_id].proc_read)(m); + else + palinfo_handle_smp(m, f); + + put_cpu(); + return 0; +} + +static int palinfo_add_proc(unsigned int cpu) +{ + pal_func_cpu_u_t f; + struct proc_dir_entry *cpu_dir; + int j; + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + sprintf(cpustr, "cpu%d", cpu); + + cpu_dir = proc_mkdir(cpustr, palinfo_dir); + if (!cpu_dir) + return -EINVAL; + + f.req_cpu = cpu; + + for (j=0; j < NR_PALINFO_ENTRIES; j++) { + f.func_id = j; + proc_create_single_data(palinfo_entries[j].name, 0, cpu_dir, + proc_palinfo_show, (void *)f.value); + } + return 0; +} + +static int palinfo_del_proc(unsigned int hcpu) +{ + char cpustr[3+4+1]; /* cpu numbers are up to 4095 on itanic */ + + sprintf(cpustr, "cpu%d", hcpu); + remove_proc_subtree(cpustr, palinfo_dir); + return 0; +} + +static enum cpuhp_state hp_online; + +static int __init palinfo_init(void) +{ + int i = 0; + + printk(KERN_INFO "PAL Information Facility v%s\n", PALINFO_VERSION); + palinfo_dir = proc_mkdir("pal", NULL); + if (!palinfo_dir) + return -ENOMEM; + + i = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/palinfo:online", + palinfo_add_proc, palinfo_del_proc); + if (i < 0) { + remove_proc_subtree("pal", NULL); + return i; + } + hp_online = i; + return 0; +} + +static void __exit palinfo_exit(void) +{ + cpuhp_remove_state(hp_online); + remove_proc_subtree("pal", NULL); +} + +module_init(palinfo_init); +module_exit(palinfo_exit); diff --git a/arch/ia64/kernel/patch.c b/arch/ia64/kernel/patch.c new file mode 100644 index 000000000000..7f21a8c57ed7 --- /dev/null +++ b/arch/ia64/kernel/patch.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Instruction-patching support. + * + * Copyright (C) 2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include + +#include +#include +#include +#include + +/* + * This was adapted from code written by Tony Luck: + * + * The 64-bit value in a "movl reg=value" is scattered between the two words of the bundle + * like this: + * + * 6 6 5 4 3 2 1 + * 3210987654321098765432109876543210987654321098765432109876543210 + * ABBBBBBBBBBBBBBBBBBBBBBBCCCCCCCCCCCCCCCCCCDEEEEEFFFFFFFFFGGGGGGG + * + * CCCCCCCCCCCCCCCCCCxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx + * xxxxAFFFFFFFFFEEEEEDxGGGGGGGxxxxxxxxxxxxxBBBBBBBBBBBBBBBBBBBBBBB + */ +static u64 +get_imm64 (u64 insn_addr) +{ + u64 *p = (u64 *) (insn_addr & -16); /* mask out slot number */ + + return ( (p[1] & 0x0800000000000000UL) << 4) | /*A*/ + ((p[1] & 0x00000000007fffffUL) << 40) | /*B*/ + ((p[0] & 0xffffc00000000000UL) >> 24) | /*C*/ + ((p[1] & 0x0000100000000000UL) >> 23) | /*D*/ + ((p[1] & 0x0003e00000000000UL) >> 29) | /*E*/ + ((p[1] & 0x07fc000000000000UL) >> 43) | /*F*/ + ((p[1] & 0x000007f000000000UL) >> 36); /*G*/ +} + +/* Patch instruction with "val" where "mask" has 1 bits. */ +void +ia64_patch (u64 insn_addr, u64 mask, u64 val) +{ + u64 m0, m1, v0, v1, b0, b1, *b = (u64 *) (insn_addr & -16); +# define insn_mask ((1UL << 41) - 1) + unsigned long shift; + + b0 = b[0]; b1 = b[1]; + shift = 5 + 41 * (insn_addr % 16); /* 5 bits of template, then 3 x 41-bit instructions */ + if (shift >= 64) { + m1 = mask << (shift - 64); + v1 = val << (shift - 64); + } else { + m0 = mask << shift; m1 = mask >> (64 - shift); + v0 = val << shift; v1 = val >> (64 - shift); + b[0] = (b0 & ~m0) | (v0 & m0); + } + b[1] = (b1 & ~m1) | (v1 & m1); +} + +void +ia64_patch_imm64 (u64 insn_addr, u64 val) +{ + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, + 0x01fffefe000UL, ( ((val & 0x8000000000000000UL) >> 27) /* bit 63 -> 36 */ + | ((val & 0x0000000000200000UL) << 0) /* bit 21 -> 21 */ + | ((val & 0x00000000001f0000UL) << 6) /* bit 16 -> 22 */ + | ((val & 0x000000000000ff80UL) << 20) /* bit 7 -> 27 */ + | ((val & 0x000000000000007fUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr + 1, 0x1ffffffffffUL, val >> 22); +} + +void +ia64_patch_imm60 (u64 insn_addr, u64 val) +{ + /* The assembler may generate offset pointing to either slot 1 + or slot 2 for a long (2-slot) instruction, occupying slots 1 + and 2. */ + insn_addr &= -16UL; + ia64_patch(insn_addr + 2, + 0x011ffffe000UL, ( ((val & 0x0800000000000000UL) >> 23) /* bit 59 -> 36 */ + | ((val & 0x00000000000fffffUL) << 13) /* bit 0 -> 13 */)); + ia64_patch(insn_addr + 1, 0x1fffffffffcUL, val >> 18); +} + +/* + * We need sometimes to load the physical address of a kernel + * object. Often we can convert the virtual address to physical + * at execution time, but sometimes (either for performance reasons + * or during error recovery) we cannot to this. Patch the marked + * bundles to load the physical address. + */ +void __init +ia64_patch_vtop (unsigned long start, unsigned long end) +{ + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + + /* replace virtual address with corresponding physical address: */ + ia64_patch_imm64(ip, ia64_tpa(get_imm64(ip))); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +/* + * Disable the RSE workaround by turning the conditional branch + * that we tagged in each place the workaround was used into an + * unconditional branch. + */ +void __init +ia64_patch_rse (unsigned long start, unsigned long end) +{ + s32 *offp = (s32 *) start; + u64 ip, *b; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + + b = (u64 *)(ip & -16); + b[1] &= ~0xf800000L; + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +ia64_patch_mckinley_e9 (unsigned long start, unsigned long end) +{ + static int first_time = 1; + int need_workaround; + s32 *offp = (s32 *) start; + u64 *wp; + + need_workaround = (local_cpu_data->family == 0x1f && local_cpu_data->model == 0); + + if (first_time) { + first_time = 0; + if (need_workaround) + printk(KERN_INFO "Leaving McKinley Errata 9 workaround enabled\n"); + } + if (need_workaround) + return; + + while (offp < (s32 *) end) { + wp = (u64 *) ia64_imva((char *) offp + *offp); + wp[0] = 0x0000000100000011UL; /* nop.m 0; nop.i 0; br.ret.sptk.many b6 */ + wp[1] = 0x0084006880000200UL; + wp[2] = 0x0000000100000000UL; /* nop.m 0; nop.i 0; nop.i 0 */ + wp[3] = 0x0004000000000200UL; + ia64_fc(wp); ia64_fc(wp + 2); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void __init +patch_fsyscall_table (unsigned long start, unsigned long end) +{ + extern unsigned long fsyscall_table[NR_syscalls]; + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) ia64_imva((char *) offp + *offp); + ia64_patch_imm64(ip, (u64) fsyscall_table); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +static void __init +patch_brl_fsys_bubble_down (unsigned long start, unsigned long end) +{ + extern char fsys_bubble_down[]; + s32 *offp = (s32 *) start; + u64 ip; + + while (offp < (s32 *) end) { + ip = (u64) offp + *offp; + ia64_patch_imm60((u64) ia64_imva((void *) ip), + (u64) (fsys_bubble_down - (ip & -16)) / 16); + ia64_fc((void *) ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} + +void __init +ia64_patch_gate (void) +{ +# define START(name) ((unsigned long) __start_gate_##name##_patchlist) +# define END(name) ((unsigned long)__end_gate_##name##_patchlist) + + patch_fsyscall_table(START(fsyscall), END(fsyscall)); + patch_brl_fsys_bubble_down(START(brl_fsys_bubble_down), END(brl_fsys_bubble_down)); + ia64_patch_vtop(START(vtop), END(vtop)); + ia64_patch_mckinley_e9(START(mckinley_e9), END(mckinley_e9)); +} + +void ia64_patch_phys_stack_reg(unsigned long val) +{ + s32 * offp = (s32 *) __start___phys_stack_reg_patchlist; + s32 * end = (s32 *) __end___phys_stack_reg_patchlist; + u64 ip, mask, imm; + + /* see instruction format A4: adds r1 = imm13, r3 */ + mask = (0x3fUL << 27) | (0x7f << 13); + imm = (((val >> 7) & 0x3f) << 27) | (val & 0x7f) << 13; + + while (offp < end) { + ip = (u64) offp + *offp; + ia64_patch(ip, mask, imm); + ia64_fc((void *)ip); + ++offp; + } + ia64_sync_i(); + ia64_srlz_i(); +} diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c new file mode 100644 index 000000000000..c90221733c6b --- /dev/null +++ b/arch/ia64/kernel/pci-dma.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Dynamic DMA mapping support. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +int no_iommu __read_mostly; +#ifdef CONFIG_IOMMU_DEBUG +int force_iommu __read_mostly = 1; +#else +int force_iommu __read_mostly; +#endif + +static int __init pci_iommu_init(void) +{ + if (iommu_detected) + intel_iommu_init(); + + return 0; +} + +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); diff --git a/arch/ia64/kernel/perfmon_itanium.h b/arch/ia64/kernel/perfmon_itanium.h new file mode 100644 index 000000000000..dbd04028aafa --- /dev/null +++ b/arch/ia64/kernel/perfmon_itanium.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This file contains the Itanium PMU register description tables + * and pmc checker. + * + * Copyright (C) 2002-2003 Hewlett Packard Co + * Stephane Eranian + */ +static int pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs); + +static pfm_reg_desc_t pfm_ita_pmc_desc[PMU_MAX_PMCS]={ +/* pmc0 */ { PFM_REG_CONTROL , 0, 0x1UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc1 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc2 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc3 */ { PFM_REG_CONTROL , 0, 0x0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc4 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(4),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc5 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(5),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc6 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(6),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc7 */ { PFM_REG_COUNTING, 6, 0x0UL, -1UL, NULL, NULL, {RDEP(7),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc8 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc9 */ { PFM_REG_CONFIG , 0, 0xf00000003ffffff8UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc10 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(0)|RDEP(1),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc11 */ { PFM_REG_MONITOR , 6, 0x0000000010000000UL, -1UL, NULL, pfm_ita_pmc_check, {RDEP(2)|RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc12 */ { PFM_REG_MONITOR , 6, 0x0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, +/* pmc13 */ { PFM_REG_CONFIG , 0, 0x0003ffff00000001UL, -1UL, NULL, pfm_ita_pmc_check, {0UL,0UL, 0UL, 0UL}, {0UL,0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0x0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static pfm_reg_desc_t pfm_ita_pmd_desc[PMU_MAX_PMDS]={ +/* pmd0 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(1),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd1 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(0),0UL, 0UL, 0UL}, {RDEP(10),0UL, 0UL, 0UL}}, +/* pmd2 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(3)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd3 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(17),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, +/* pmd4 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(4),0UL, 0UL, 0UL}}, +/* pmd5 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(5),0UL, 0UL, 0UL}}, +/* pmd6 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(6),0UL, 0UL, 0UL}}, +/* pmd7 */ { PFM_REG_COUNTING, 0, 0UL, -1UL, NULL, NULL, {0UL,0UL, 0UL, 0UL}, {RDEP(7),0UL, 0UL, 0UL}}, +/* pmd8 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd9 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd10 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd11 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd12 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(13)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd13 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(14)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd14 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(15)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd15 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(16),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd16 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(8)|RDEP(9)|RDEP(10)|RDEP(11)|RDEP(12)|RDEP(13)|RDEP(14)|RDEP(15),0UL, 0UL, 0UL}, {RDEP(12),0UL, 0UL, 0UL}}, +/* pmd17 */ { PFM_REG_BUFFER , 0, 0UL, -1UL, NULL, NULL, {RDEP(2)|RDEP(3),0UL, 0UL, 0UL}, {RDEP(11),0UL, 0UL, 0UL}}, + { PFM_REG_END , 0, 0UL, -1UL, NULL, NULL, {0,}, {0,}}, /* end marker */ +}; + +static int +pfm_ita_pmc_check(struct task_struct *task, pfm_context_t *ctx, unsigned int cnum, unsigned long *val, struct pt_regs *regs) +{ + int ret; + int is_loaded; + + /* sanitfy check */ + if (ctx == NULL) return -EINVAL; + + is_loaded = ctx->ctx_state == PFM_CTX_LOADED || ctx->ctx_state == PFM_CTX_MASKED; + + /* + * we must clear the (instruction) debug registers if pmc13.ta bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 13 && is_loaded && ((*val & 0x1) == 0UL) && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc13.ta cleared, clearing ibr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(1, ctx, NULL, 0, regs); + if (ret) return ret; + } + + /* + * we must clear the (data) debug registers if pmc11.pt bit is cleared + * before they are written (fl_using_dbreg==0) to avoid picking up stale information. + */ + if (cnum == 11 && is_loaded && ((*val >> 28)& 0x1) == 0 && ctx->ctx_fl_using_dbreg == 0) { + + DPRINT(("pmc[%d]=0x%lx has active pmc11.pt cleared, clearing dbr\n", cnum, *val)); + + /* don't mix debug with perfmon */ + if (task && (task->thread.flags & IA64_THREAD_DBG_VALID) != 0) return -EINVAL; + + /* + * a count of 0 will mark the debug registers as in use and also + * ensure that they are properly cleared. + */ + ret = pfm_write_ibr_dbr(0, ctx, NULL, 0, regs); + if (ret) return ret; + } + return 0; +} + +/* + * impl_pmcs, impl_pmds are computed at runtime to minimize errors! + */ +static pmu_config_t pmu_conf_ita={ + .pmu_name = "Itanium", + .pmu_family = 0x7, + .ovfl_val = (1UL << 32) - 1, + .pmd_desc = pfm_ita_pmd_desc, + .pmc_desc = pfm_ita_pmc_desc, + .num_ibrs = 8, + .num_dbrs = 8, + .use_rr_dbregs = 1, /* debug register are use for range retrictions */ +}; + + diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c new file mode 100644 index 000000000000..9a5cd9fad3a9 --- /dev/null +++ b/arch/ia64/kernel/process.c @@ -0,0 +1,611 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * 04/11/17 Ashok Raj Added CPU Hotplug Support + * + * 2005-10-07 Keith Owens + * Add notify_die() hooks. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" + +#include "sigframe.h" + +void (*ia64_mark_idle)(int); + +unsigned long boot_option_idle_override = IDLE_NO_OVERRIDE; +EXPORT_SYMBOL(boot_option_idle_override); +void (*pm_power_off) (void); +EXPORT_SYMBOL(pm_power_off); + +static void +ia64_do_show_stack (struct unw_frame_info *info, void *arg) +{ + unsigned long ip, sp, bsp; + const char *loglvl = arg; + + printk("%s\nCall Trace:\n", loglvl); + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + + unw_get_sp(info, &sp); + unw_get_bsp(info, &bsp); + printk("%s [<%016lx>] %pS\n" + " sp=%016lx bsp=%016lx\n", + loglvl, ip, (void *)ip, sp, bsp); + } while (unw_unwind(info) >= 0); +} + +void +show_stack (struct task_struct *task, unsigned long *sp, const char *loglvl) +{ + if (!task) + unw_init_running(ia64_do_show_stack, (void *)loglvl); + else { + struct unw_frame_info info; + + unw_init_from_blocked_task(&info, task); + ia64_do_show_stack(&info, (void *)loglvl); + } +} + +void +show_regs (struct pt_regs *regs) +{ + unsigned long ip = regs->cr_iip + ia64_psr(regs)->ri; + + print_modules(); + printk("\n"); + show_regs_print_info(KERN_DEFAULT); + printk("psr : %016lx ifs : %016lx ip : [<%016lx>] %s (%s)\n", + regs->cr_ipsr, regs->cr_ifs, ip, print_tainted(), + init_utsname()->release); + printk("ip is at %pS\n", (void *)ip); + printk("unat: %016lx pfs : %016lx rsc : %016lx\n", + regs->ar_unat, regs->ar_pfs, regs->ar_rsc); + printk("rnat: %016lx bsps: %016lx pr : %016lx\n", + regs->ar_rnat, regs->ar_bspstore, regs->pr); + printk("ldrs: %016lx ccv : %016lx fpsr: %016lx\n", + regs->loadrs, regs->ar_ccv, regs->ar_fpsr); + printk("csd : %016lx ssd : %016lx\n", regs->ar_csd, regs->ar_ssd); + printk("b0 : %016lx b6 : %016lx b7 : %016lx\n", regs->b0, regs->b6, regs->b7); + printk("f6 : %05lx%016lx f7 : %05lx%016lx\n", + regs->f6.u.bits[1], regs->f6.u.bits[0], + regs->f7.u.bits[1], regs->f7.u.bits[0]); + printk("f8 : %05lx%016lx f9 : %05lx%016lx\n", + regs->f8.u.bits[1], regs->f8.u.bits[0], + regs->f9.u.bits[1], regs->f9.u.bits[0]); + printk("f10 : %05lx%016lx f11 : %05lx%016lx\n", + regs->f10.u.bits[1], regs->f10.u.bits[0], + regs->f11.u.bits[1], regs->f11.u.bits[0]); + + printk("r1 : %016lx r2 : %016lx r3 : %016lx\n", regs->r1, regs->r2, regs->r3); + printk("r8 : %016lx r9 : %016lx r10 : %016lx\n", regs->r8, regs->r9, regs->r10); + printk("r11 : %016lx r12 : %016lx r13 : %016lx\n", regs->r11, regs->r12, regs->r13); + printk("r14 : %016lx r15 : %016lx r16 : %016lx\n", regs->r14, regs->r15, regs->r16); + printk("r17 : %016lx r18 : %016lx r19 : %016lx\n", regs->r17, regs->r18, regs->r19); + printk("r20 : %016lx r21 : %016lx r22 : %016lx\n", regs->r20, regs->r21, regs->r22); + printk("r23 : %016lx r24 : %016lx r25 : %016lx\n", regs->r23, regs->r24, regs->r25); + printk("r26 : %016lx r27 : %016lx r28 : %016lx\n", regs->r26, regs->r27, regs->r28); + printk("r29 : %016lx r30 : %016lx r31 : %016lx\n", regs->r29, regs->r30, regs->r31); + + if (user_mode(regs)) { + /* print the stacked registers */ + unsigned long val, *bsp, ndirty; + int i, sof, is_nat = 0; + + sof = regs->cr_ifs & 0x7f; /* size of frame */ + ndirty = (regs->loadrs >> 19); + bsp = ia64_rse_skip_regs((unsigned long *) regs->ar_bspstore, ndirty); + for (i = 0; i < sof; ++i) { + get_user(val, (unsigned long __user *) ia64_rse_skip_regs(bsp, i)); + printk("r%-3u:%c%016lx%s", 32 + i, is_nat ? '*' : ' ', val, + ((i == sof - 1) || (i % 3) == 2) ? "\n" : " "); + } + } else + show_stack(NULL, NULL, KERN_DEFAULT); +} + +/* local support for deprecated console_print */ +void +console_print(const char *s) +{ + printk(KERN_EMERG "%s", s); +} + +void +do_notify_resume_user(sigset_t *unused, struct sigscratch *scr, long in_syscall) +{ + if (fsys_mode(current, &scr->pt)) { + /* + * defer signal-handling etc. until we return to + * privilege-level 0. + */ + if (!ia64_psr(&scr->pt)->lp) + ia64_psr(&scr->pt)->lp = 1; + return; + } + + /* deal with pending signal delivery */ + if (test_thread_flag(TIF_SIGPENDING) || + test_thread_flag(TIF_NOTIFY_SIGNAL)) { + local_irq_enable(); /* force interrupt enable */ + ia64_do_signal(scr, in_syscall); + } + + if (test_thread_flag(TIF_NOTIFY_RESUME)) { + local_irq_enable(); /* force interrupt enable */ + resume_user_mode_work(&scr->pt); + } + + /* copy user rbs to kernel rbs */ + if (unlikely(test_thread_flag(TIF_RESTORE_RSE))) { + local_irq_enable(); /* force interrupt enable */ + ia64_sync_krbs(); + } + + local_irq_disable(); /* force interrupt disable */ +} + +static int __init nohalt_setup(char * str) +{ + cpu_idle_poll_ctrl(true); + return 1; +} +__setup("nohalt", nohalt_setup); + +#ifdef CONFIG_HOTPLUG_CPU +/* We don't actually take CPU down, just spin without interrupts. */ +static inline void __noreturn play_dead(void) +{ + unsigned int this_cpu = smp_processor_id(); + + /* Ack it */ + __this_cpu_write(cpu_state, CPU_DEAD); + + max_xtp(); + local_irq_disable(); + idle_task_exit(); + ia64_jump_to_sal(&sal_boot_rendez_state[this_cpu]); + /* + * The above is a point of no-return, the processor is + * expected to be in SAL loop now. + */ + BUG(); +} +#else +static inline void __noreturn play_dead(void) +{ + BUG(); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +void __noreturn arch_cpu_idle_dead(void) +{ + play_dead(); +} + +void arch_cpu_idle(void) +{ + void (*mark_idle)(int) = ia64_mark_idle; + +#ifdef CONFIG_SMP + min_xtp(); +#endif + rmb(); + if (mark_idle) + (*mark_idle)(1); + + raw_safe_halt(); + raw_local_irq_disable(); + + if (mark_idle) + (*mark_idle)(0); +#ifdef CONFIG_SMP + normal_xtp(); +#endif +} + +void +ia64_save_extra (struct task_struct *task) +{ + if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) + ia64_save_debug_regs(&task->thread.dbr[0]); +} + +void +ia64_load_extra (struct task_struct *task) +{ + if ((task->thread.flags & IA64_THREAD_DBG_VALID) != 0) + ia64_load_debug_regs(&task->thread.dbr[0]); +} + +/* + * Copy the state of an ia-64 thread. + * + * We get here through the following call chain: + * + * from user-level: from kernel: + * + * + * sys_clone : + * kernel_clone kernel_clone + * copy_thread copy_thread + * + * This means that the stack layout is as follows: + * + * +---------------------+ (highest addr) + * | struct pt_regs | + * +---------------------+ + * | struct switch_stack | + * +---------------------+ + * | | + * | memory stack | + * | | <-- sp (lowest addr) + * +---------------------+ + * + * Observe that we copy the unat values that are in pt_regs and switch_stack. Spilling an + * integer to address X causes bit N in ar.unat to be set to the NaT bit of the register, + * with N=(X & 0x1ff)/8. Thus, copying the unat value preserves the NaT bits ONLY if the + * pt_regs structure in the parent is congruent to that of the child, modulo 512. Since + * the stack is page aligned and the page size is at least 4KB, this is always the case, + * so there is nothing to worry about. + */ +int +copy_thread(struct task_struct *p, const struct kernel_clone_args *args) +{ + unsigned long clone_flags = args->flags; + unsigned long user_stack_base = args->stack; + unsigned long user_stack_size = args->stack_size; + unsigned long tls = args->tls; + extern char ia64_ret_from_clone; + struct switch_stack *child_stack, *stack; + unsigned long rbs, child_rbs, rbs_size; + struct pt_regs *child_ptregs; + struct pt_regs *regs = current_pt_regs(); + int retval = 0; + + child_ptregs = (struct pt_regs *) ((unsigned long) p + IA64_STK_OFFSET) - 1; + child_stack = (struct switch_stack *) child_ptregs - 1; + + rbs = (unsigned long) current + IA64_RBS_OFFSET; + child_rbs = (unsigned long) p + IA64_RBS_OFFSET; + + /* copy parts of thread_struct: */ + p->thread.ksp = (unsigned long) child_stack - 16; + + /* + * NOTE: The calling convention considers all floating point + * registers in the high partition (fph) to be scratch. Since + * the only way to get to this point is through a system call, + * we know that the values in fph are all dead. Hence, there + * is no need to inherit the fph state from the parent to the + * child and all we have to do is to make sure that + * IA64_THREAD_FPH_VALID is cleared in the child. + * + * XXX We could push this optimization a bit further by + * clearing IA64_THREAD_FPH_VALID on ANY system call. + * However, it's not clear this is worth doing. Also, it + * would be a slight deviation from the normal Linux system + * call behavior where scratch registers are preserved across + * system calls (unless used by the system call itself). + */ +# define THREAD_FLAGS_TO_CLEAR (IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID \ + | IA64_THREAD_PM_VALID) +# define THREAD_FLAGS_TO_SET 0 + p->thread.flags = ((current->thread.flags & ~THREAD_FLAGS_TO_CLEAR) + | THREAD_FLAGS_TO_SET); + + ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */ + + if (unlikely(args->fn)) { + if (unlikely(args->idle)) { + /* fork_idle() called us */ + return 0; + } + memset(child_stack, 0, sizeof(*child_ptregs) + sizeof(*child_stack)); + child_stack->r4 = (unsigned long) args->fn; + child_stack->r5 = (unsigned long) args->fn_arg; + /* + * Preserve PSR bits, except for bits 32-34 and 37-45, + * which we can't read. + */ + child_ptregs->cr_ipsr = ia64_getreg(_IA64_REG_PSR) | IA64_PSR_BN; + /* mark as valid, empty frame */ + child_ptregs->cr_ifs = 1UL << 63; + child_stack->ar_fpsr = child_ptregs->ar_fpsr + = ia64_getreg(_IA64_REG_AR_FPSR); + child_stack->pr = (1 << PRED_KERNEL_STACK); + child_stack->ar_bspstore = child_rbs; + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); + + return 0; + } + stack = ((struct switch_stack *) regs) - 1; + /* copy parent's switch_stack & pt_regs to child: */ + memcpy(child_stack, stack, sizeof(*child_ptregs) + sizeof(*child_stack)); + + /* copy the parent's register backing store to the child: */ + rbs_size = stack->ar_bspstore - rbs; + memcpy((void *) child_rbs, (void *) rbs, rbs_size); + if (clone_flags & CLONE_SETTLS) + child_ptregs->r13 = tls; + if (user_stack_base) { + child_ptregs->r12 = user_stack_base + user_stack_size - 16; + child_ptregs->ar_bspstore = user_stack_base; + child_ptregs->ar_rnat = 0; + child_ptregs->loadrs = 0; + } + child_stack->ar_bspstore = child_rbs + rbs_size; + child_stack->b0 = (unsigned long) &ia64_ret_from_clone; + + /* stop some PSR bits from being inherited. + * the psr.up/psr.pp bits must be cleared on fork but inherited on execve() + * therefore we must specify them explicitly here and not include them in + * IA64_PSR_BITS_TO_CLEAR. + */ + child_ptregs->cr_ipsr = ((child_ptregs->cr_ipsr | IA64_PSR_BITS_TO_SET) + & ~(IA64_PSR_BITS_TO_CLEAR | IA64_PSR_PP | IA64_PSR_UP)); + return retval; +} + +asmlinkage long ia64_clone(unsigned long clone_flags, unsigned long stack_start, + unsigned long stack_size, unsigned long parent_tidptr, + unsigned long child_tidptr, unsigned long tls) +{ + struct kernel_clone_args args = { + .flags = (lower_32_bits(clone_flags) & ~CSIGNAL), + .pidfd = (int __user *)parent_tidptr, + .child_tid = (int __user *)child_tidptr, + .parent_tid = (int __user *)parent_tidptr, + .exit_signal = (lower_32_bits(clone_flags) & CSIGNAL), + .stack = stack_start, + .stack_size = stack_size, + .tls = tls, + }; + + return kernel_clone(&args); +} + +static void +do_copy_task_regs (struct task_struct *task, struct unw_frame_info *info, void *arg) +{ + unsigned long mask, sp, nat_bits = 0, ar_rnat, urbs_end, cfm; + unsigned long ip; + elf_greg_t *dst = arg; + struct pt_regs *pt; + char nat; + int i; + + memset(dst, 0, sizeof(elf_gregset_t)); /* don't leak any kernel bits to user-level */ + + if (unw_unwind_to_user(info) < 0) + return; + + unw_get_sp(info, &sp); + pt = (struct pt_regs *) (sp + 16); + + urbs_end = ia64_get_user_rbs_end(task, pt, &cfm); + + if (ia64_sync_user_rbs(task, info->sw, pt->ar_bspstore, urbs_end) < 0) + return; + + ia64_peek(task, info->sw, urbs_end, (long) ia64_rse_rnat_addr((long *) urbs_end), + &ar_rnat); + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + /* r0 is zero */ + for (i = 1, mask = (1UL << i); i < 32; ++i) { + unw_get_gr(info, i, &dst[i], &nat); + if (nat) + nat_bits |= mask; + mask <<= 1; + } + dst[32] = nat_bits; + unw_get_pr(info, &dst[33]); + + for (i = 0; i < 8; ++i) + unw_get_br(info, i, &dst[34 + i]); + + unw_get_rp(info, &ip); + dst[42] = ip + ia64_psr(pt)->ri; + dst[43] = cfm; + dst[44] = pt->cr_ipsr & IA64_PSR_UM; + + unw_get_ar(info, UNW_AR_RSC, &dst[45]); + /* + * For bsp and bspstore, unw_get_ar() would return the kernel + * addresses, but we need the user-level addresses instead: + */ + dst[46] = urbs_end; /* note: by convention PT_AR_BSP points to the end of the urbs! */ + dst[47] = pt->ar_bspstore; + dst[48] = ar_rnat; + unw_get_ar(info, UNW_AR_CCV, &dst[49]); + unw_get_ar(info, UNW_AR_UNAT, &dst[50]); + unw_get_ar(info, UNW_AR_FPSR, &dst[51]); + dst[52] = pt->ar_pfs; /* UNW_AR_PFS is == to pt->cr_ifs for interrupt frames */ + unw_get_ar(info, UNW_AR_LC, &dst[53]); + unw_get_ar(info, UNW_AR_EC, &dst[54]); + unw_get_ar(info, UNW_AR_CSD, &dst[55]); + unw_get_ar(info, UNW_AR_SSD, &dst[56]); +} + +static void +do_copy_regs (struct unw_frame_info *info, void *arg) +{ + do_copy_task_regs(current, info, arg); +} + +void +ia64_elf_core_copy_regs (struct pt_regs *pt, elf_gregset_t dst) +{ + unw_init_running(do_copy_regs, dst); +} + +/* + * Flush thread state. This is called when a thread does an execve(). + */ +void +flush_thread (void) +{ + /* drop floating-point and debug-register state if it exists: */ + current->thread.flags &= ~(IA64_THREAD_FPH_VALID | IA64_THREAD_DBG_VALID); + ia64_drop_fpu(current); +} + +/* + * Clean up state associated with a thread. This is called when + * the thread calls exit(). + */ +void +exit_thread (struct task_struct *tsk) +{ + + ia64_drop_fpu(tsk); +} + +unsigned long +__get_wchan (struct task_struct *p) +{ + struct unw_frame_info info; + unsigned long ip; + int count = 0; + + /* + * Note: p may not be a blocked task (it could be current or + * another process running on some other CPU. Rather than + * trying to determine if p is really blocked, we just assume + * it's blocked and rely on the unwind routines to fail + * gracefully if the process wasn't really blocked after all. + * --davidm 99/12/15 + */ + unw_init_from_blocked_task(&info, p); + do { + if (task_is_running(p)) + return 0; + if (unw_unwind(&info) < 0) + return 0; + unw_get_ip(&info, &ip); + if (!in_sched_functions(ip)) + return ip; + } while (count++ < 16); + return 0; +} + +void +cpu_halt (void) +{ + pal_power_mgmt_info_u_t power_info[8]; + unsigned long min_power; + int i, min_power_state; + + if (ia64_pal_halt_info(power_info) != 0) + return; + + min_power_state = 0; + min_power = power_info[0].pal_power_mgmt_info_s.power_consumption; + for (i = 1; i < 8; ++i) + if (power_info[i].pal_power_mgmt_info_s.im + && power_info[i].pal_power_mgmt_info_s.power_consumption < min_power) { + min_power = power_info[i].pal_power_mgmt_info_s.power_consumption; + min_power_state = i; + } + + while (1) + ia64_pal_halt(min_power_state); +} + +void machine_shutdown(void) +{ + smp_shutdown_nonboot_cpus(reboot_cpu); + +#ifdef CONFIG_KEXEC + kexec_disable_iosapic(); +#endif +} + +void +machine_restart (char *restart_cmd) +{ + (void) notify_die(DIE_MACHINE_RESTART, restart_cmd, NULL, 0, 0, 0); + efi_reboot(REBOOT_WARM, NULL); +} + +void +machine_halt (void) +{ + (void) notify_die(DIE_MACHINE_HALT, "", NULL, 0, 0, 0); + cpu_halt(); +} + +void +machine_power_off (void) +{ + do_kernel_power_off(); + machine_halt(); +} + +EXPORT_SYMBOL(ia64_delay_loop); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c new file mode 100644 index 000000000000..4c41912c550f --- /dev/null +++ b/arch/ia64/kernel/ptrace.c @@ -0,0 +1,2012 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kernel support for the ptrace() and syscall tracing interfaces. + * + * Copyright (C) 1999-2005 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2006 Intel Co + * 2006-08-12 - IA64 Native Utrace implementation support added by + * Anil S Keshavamurthy + * + * Derived from the x86 and Alpha versions. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "entry.h" + +/* + * Bits in the PSR that we allow ptrace() to change: + * be, up, ac, mfl, mfh (the user mask; five bits total) + * db (debug breakpoint fault; one bit) + * id (instruction debug fault disable; one bit) + * dd (data debug fault disable; one bit) + * ri (restart instruction; two bits) + * is (instruction set; one bit) + */ +#define IPSR_MASK (IA64_PSR_UM | IA64_PSR_DB | IA64_PSR_IS \ + | IA64_PSR_ID | IA64_PSR_DD | IA64_PSR_RI) + +#define MASK(nbits) ((1UL << (nbits)) - 1) /* mask with NBITS bits set */ +#define PFM_MASK MASK(38) + +#define PTRACE_DEBUG 0 + +#if PTRACE_DEBUG +# define dprintk(format...) printk(format) +# define inline +#else +# define dprintk(format...) +#endif + +/* Return TRUE if PT was created due to kernel-entry via a system-call. */ + +static inline int +in_syscall (struct pt_regs *pt) +{ + return (long) pt->cr_ifs >= 0; +} + +/* + * Collect the NaT bits for r1-r31 from scratch_unat and return a NaT + * bitset where bit i is set iff the NaT bit of register i is set. + */ +unsigned long +ia64_get_scratch_nat_bits (struct pt_regs *pt, unsigned long scratch_unat) +{ +# define GET_BITS(first, last, unat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&pt->r##first); \ + unsigned long nbits = (last - first + 1); \ + unsigned long mask = MASK(nbits) << first; \ + unsigned long dist; \ + if (bit < first) \ + dist = 64 + bit - first; \ + else \ + dist = bit - first; \ + ia64_rotr(unat, dist) & mask; \ + }) + unsigned long val; + + /* + * Registers that are stored consecutively in struct pt_regs + * can be handled in parallel. If the register order in + * struct_pt_regs changes, this code MUST be updated. + */ + val = GET_BITS( 1, 1, scratch_unat); + val |= GET_BITS( 2, 3, scratch_unat); + val |= GET_BITS(12, 13, scratch_unat); + val |= GET_BITS(14, 14, scratch_unat); + val |= GET_BITS(15, 15, scratch_unat); + val |= GET_BITS( 8, 11, scratch_unat); + val |= GET_BITS(16, 31, scratch_unat); + return val; + +# undef GET_BITS +} + +/* + * Set the NaT bits for the scratch registers according to NAT and + * return the resulting unat (assuming the scratch registers are + * stored in PT). + */ +unsigned long +ia64_put_scratch_nat_bits (struct pt_regs *pt, unsigned long nat) +{ +# define PUT_BITS(first, last, nat) \ + ({ \ + unsigned long bit = ia64_unat_pos(&pt->r##first); \ + unsigned long nbits = (last - first + 1); \ + unsigned long mask = MASK(nbits) << first; \ + long dist; \ + if (bit < first) \ + dist = 64 + bit - first; \ + else \ + dist = bit - first; \ + ia64_rotl(nat & mask, dist); \ + }) + unsigned long scratch_unat; + + /* + * Registers that are stored consecutively in struct pt_regs + * can be handled in parallel. If the register order in + * struct_pt_regs changes, this code MUST be updated. + */ + scratch_unat = PUT_BITS( 1, 1, nat); + scratch_unat |= PUT_BITS( 2, 3, nat); + scratch_unat |= PUT_BITS(12, 13, nat); + scratch_unat |= PUT_BITS(14, 14, nat); + scratch_unat |= PUT_BITS(15, 15, nat); + scratch_unat |= PUT_BITS( 8, 11, nat); + scratch_unat |= PUT_BITS(16, 31, nat); + + return scratch_unat; + +# undef PUT_BITS +} + +#define IA64_MLX_TEMPLATE 0x2 +#define IA64_MOVL_OPCODE 6 + +void +ia64_increment_ip (struct pt_regs *regs) +{ + unsigned long w0, ri = ia64_psr(regs)->ri + 1; + + if (ri > 2) { + ri = 0; + regs->cr_iip += 16; + } else if (ri == 2) { + get_user(w0, (char __user *) regs->cr_iip + 0); + if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { + /* + * rfi'ing to slot 2 of an MLX bundle causes + * an illegal operation fault. We don't want + * that to happen... + */ + ri = 0; + regs->cr_iip += 16; + } + } + ia64_psr(regs)->ri = ri; +} + +void +ia64_decrement_ip (struct pt_regs *regs) +{ + unsigned long w0, ri = ia64_psr(regs)->ri - 1; + + if (ia64_psr(regs)->ri == 0) { + regs->cr_iip -= 16; + ri = 2; + get_user(w0, (char __user *) regs->cr_iip + 0); + if (((w0 >> 1) & 0xf) == IA64_MLX_TEMPLATE) { + /* + * rfi'ing to slot 2 of an MLX bundle causes + * an illegal operation fault. We don't want + * that to happen... + */ + ri = 1; + } + } + ia64_psr(regs)->ri = ri; +} + +/* + * This routine is used to read an rnat bits that are stored on the + * kernel backing store. Since, in general, the alignment of the user + * and kernel are different, this is not completely trivial. In + * essence, we need to construct the user RNAT based on up to two + * kernel RNAT values and/or the RNAT value saved in the child's + * pt_regs. + * + * user rbs + * + * +--------+ <-- lowest address + * | slot62 | + * +--------+ + * | rnat | 0x....1f8 + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_regs->ar_rnat + * +--------+ | + * | slot02 | / kernel rbs + * +--------+ +--------+ + * <- child_regs->ar_bspstore | slot61 | <-- krbs + * +- - - - + +--------+ + * | slot62 | + * +- - - - + +--------+ + * | rnat | + * +- - - - + +--------+ + * vrnat | slot00 | + * +- - - - + +--------+ + * = = + * +--------+ + * | slot00 | \ + * +--------+ | + * | slot01 | > child_stack->ar_rnat + * +--------+ | + * | slot02 | / + * +--------+ + * <--- child_stack->ar_bspstore + * + * The way to think of this code is as follows: bit 0 in the user rnat + * corresponds to some bit N (0 <= N <= 62) in one of the kernel rnat + * value. The kernel rnat value holding this bit is stored in + * variable rnat0. rnat1 is loaded with the kernel rnat value that + * form the upper bits of the user rnat value. + * + * Boundary cases: + * + * o when reading the rnat "below" the first rnat slot on the kernel + * backing store, rnat0/rnat1 are set to 0 and the low order bits are + * merged in from pt->ar_rnat. + * + * o when reading the rnat "above" the last rnat slot on the kernel + * backing store, rnat0/rnat1 gets its value from sw->ar_rnat. + */ +static unsigned long +get_rnat (struct task_struct *task, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, + unsigned long *urbs_end) +{ + unsigned long rnat0 = 0, rnat1 = 0, urnat = 0, *slot0_kaddr; + unsigned long umask = 0, mask, m; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs, nbits; + struct pt_regs *pt; + + pt = task_pt_regs(task); + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + + if (urbs_end < urnat_addr) + nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_end); + else + nbits = 63; + mask = MASK(nbits); + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be merged in from pt->ar_rnat */ + umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; + urnat = (pt->ar_rnat & umask); + mask &= ~umask; + if (!mask) + return urnat; + } + + m = mask << shift; + if (rnat0_kaddr >= kbsp) + rnat0 = sw->ar_rnat; + else if (rnat0_kaddr > krbs) + rnat0 = *rnat0_kaddr; + urnat |= (rnat0 & m) >> shift; + + m = mask >> (63 - shift); + if (rnat1_kaddr >= kbsp) + rnat1 = sw->ar_rnat; + else if (rnat1_kaddr > krbs) + rnat1 = *rnat1_kaddr; + urnat |= (rnat1 & m) << (63 - shift); + return urnat; +} + +/* + * The reverse of get_rnat. + */ +static void +put_rnat (struct task_struct *task, struct switch_stack *sw, + unsigned long *krbs, unsigned long *urnat_addr, unsigned long urnat, + unsigned long *urbs_end) +{ + unsigned long rnat0 = 0, rnat1 = 0, *slot0_kaddr, umask = 0, mask, m; + unsigned long *kbsp, *ubspstore, *rnat0_kaddr, *rnat1_kaddr, shift; + long num_regs, nbits; + struct pt_regs *pt; + unsigned long cfm, *urbs_kargs; + + pt = task_pt_regs(task); + kbsp = (unsigned long *) sw->ar_bspstore; + ubspstore = (unsigned long *) pt->ar_bspstore; + + urbs_kargs = urbs_end; + if (in_syscall(pt)) { + /* + * If entered via syscall, don't allow user to set rnat bits + * for syscall args. + */ + cfm = pt->cr_ifs; + urbs_kargs = ia64_rse_skip_regs(urbs_end, -(cfm & 0x7f)); + } + + if (urbs_kargs >= urnat_addr) + nbits = 63; + else { + if ((urnat_addr - 63) >= urbs_kargs) + return; + nbits = ia64_rse_num_regs(urnat_addr - 63, urbs_kargs); + } + mask = MASK(nbits); + + /* + * First, figure out which bit number slot 0 in user-land maps + * to in the kernel rnat. Do this by figuring out how many + * register slots we're beyond the user's backingstore and + * then computing the equivalent address in kernel space. + */ + num_regs = ia64_rse_num_regs(ubspstore, urnat_addr + 1); + slot0_kaddr = ia64_rse_skip_regs(krbs, num_regs); + shift = ia64_rse_slot_num(slot0_kaddr); + rnat1_kaddr = ia64_rse_rnat_addr(slot0_kaddr); + rnat0_kaddr = rnat1_kaddr - 64; + + if (ubspstore + 63 > urnat_addr) { + /* some bits need to be place in pt->ar_rnat: */ + umask = MASK(ia64_rse_slot_num(ubspstore)) & mask; + pt->ar_rnat = (pt->ar_rnat & ~umask) | (urnat & umask); + mask &= ~umask; + if (!mask) + return; + } + /* + * Note: Section 11.1 of the EAS guarantees that bit 63 of an + * rnat slot is ignored. so we don't have to clear it here. + */ + rnat0 = (urnat << shift); + m = mask << shift; + if (rnat0_kaddr >= kbsp) + sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat0 & m); + else if (rnat0_kaddr > krbs) + *rnat0_kaddr = ((*rnat0_kaddr & ~m) | (rnat0 & m)); + + rnat1 = (urnat >> (63 - shift)); + m = mask >> (63 - shift); + if (rnat1_kaddr >= kbsp) + sw->ar_rnat = (sw->ar_rnat & ~m) | (rnat1 & m); + else if (rnat1_kaddr > krbs) + *rnat1_kaddr = ((*rnat1_kaddr & ~m) | (rnat1 & m)); +} + +static inline int +on_kernel_rbs (unsigned long addr, unsigned long bspstore, + unsigned long urbs_end) +{ + unsigned long *rnat_addr = ia64_rse_rnat_addr((unsigned long *) + urbs_end); + return (addr >= bspstore && addr <= (unsigned long) rnat_addr); +} + +/* + * Read a word from the user-level backing store of task CHILD. ADDR + * is the user-level address to read the word from, VAL a pointer to + * the return value, and USER_BSP gives the end of the user-level + * backing store (i.e., it's the address that would be in ar.bsp after + * the user executed a "cover" instruction). + * + * This routine takes care of accessing the kernel register backing + * store for those registers that got spilled there. It also takes + * care of calculating the appropriate RNaT collection words. + */ +long +ia64_peek (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long *val) +{ + unsigned long *bspstore, *krbs, regnum, *laddr, *urbs_end, *rnat_addr; + struct pt_regs *child_regs; + size_t copied; + long ret; + + urbs_end = (long *) user_rbs_end; + laddr = (unsigned long *) addr; + child_regs = task_pt_regs(child); + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + if (on_kernel_rbs(addr, (unsigned long) bspstore, + (unsigned long) urbs_end)) + { + /* + * Attempt to read the RBS in an area that's actually + * on the kernel RBS => read the corresponding bits in + * the kernel RBS. + */ + rnat_addr = ia64_rse_rnat_addr(laddr); + ret = get_rnat(child, child_stack, krbs, rnat_addr, urbs_end); + + if (laddr == rnat_addr) { + /* return NaT collection word itself */ + *val = ret; + return 0; + } + + if (((1UL << ia64_rse_slot_num(laddr)) & ret) != 0) { + /* + * It is implementation dependent whether the + * data portion of a NaT value gets saved on a + * st8.spill or RSE spill (e.g., see EAS 2.6, + * 4.4.4.6 Register Spill and Fill). To get + * consistent behavior across all possible + * IA-64 implementations, we return zero in + * this case. + */ + *val = 0; + return 0; + } + + if (laddr < urbs_end) { + /* + * The desired word is on the kernel RBS and + * is not a NaT. + */ + regnum = ia64_rse_num_regs(bspstore, laddr); + *val = *ia64_rse_skip_regs(krbs, regnum); + return 0; + } + } + copied = access_process_vm(child, addr, &ret, sizeof(ret), FOLL_FORCE); + if (copied != sizeof(ret)) + return -EIO; + *val = ret; + return 0; +} + +long +ia64_poke (struct task_struct *child, struct switch_stack *child_stack, + unsigned long user_rbs_end, unsigned long addr, long val) +{ + unsigned long *bspstore, *krbs, regnum, *laddr; + unsigned long *urbs_end = (long *) user_rbs_end; + struct pt_regs *child_regs; + + laddr = (unsigned long *) addr; + child_regs = task_pt_regs(child); + bspstore = (unsigned long *) child_regs->ar_bspstore; + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + if (on_kernel_rbs(addr, (unsigned long) bspstore, + (unsigned long) urbs_end)) + { + /* + * Attempt to write the RBS in an area that's actually + * on the kernel RBS => write the corresponding bits + * in the kernel RBS. + */ + if (ia64_rse_is_rnat_slot(laddr)) + put_rnat(child, child_stack, krbs, laddr, val, + urbs_end); + else { + if (laddr < urbs_end) { + regnum = ia64_rse_num_regs(bspstore, laddr); + *ia64_rse_skip_regs(krbs, regnum) = val; + } + } + } else if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE) + != sizeof(val)) + return -EIO; + return 0; +} + +/* + * Calculate the address of the end of the user-level register backing + * store. This is the address that would have been stored in ar.bsp + * if the user had executed a "cover" instruction right before + * entering the kernel. If CFMP is not NULL, it is used to return the + * "current frame mask" that was active at the time the kernel was + * entered. + */ +unsigned long +ia64_get_user_rbs_end (struct task_struct *child, struct pt_regs *pt, + unsigned long *cfmp) +{ + unsigned long *krbs, *bspstore, cfm = pt->cr_ifs; + long ndirty; + + krbs = (unsigned long *) child + IA64_RBS_OFFSET/8; + bspstore = (unsigned long *) pt->ar_bspstore; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + if (in_syscall(pt)) + ndirty += (cfm & 0x7f); + else + cfm &= ~(1UL << 63); /* clear valid bit */ + + if (cfmp) + *cfmp = cfm; + return (unsigned long) ia64_rse_skip_regs(bspstore, ndirty); +} + +/* + * Synchronize (i.e, write) the RSE backing store living in kernel + * space to the VM of the CHILD task. SW and PT are the pointers to + * the switch_stack and pt_regs structures, respectively. + * USER_RBS_END is the user-level address at which the backing store + * ends. + */ +long +ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from kernel rbs to user rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + ret = ia64_peek(child, sw, user_rbs_end, addr, &val); + if (ret < 0) + return ret; + if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE) + != sizeof(val)) + return -EIO; + } + return 0; +} + +static long +ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, + unsigned long user_rbs_start, unsigned long user_rbs_end) +{ + unsigned long addr, val; + long ret; + + /* now copy word for word from user rbs to kernel rbs: */ + for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { + if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE) + != sizeof(val)) + return -EIO; + + ret = ia64_poke(child, sw, user_rbs_end, addr, val); + if (ret < 0) + return ret; + } + return 0; +} + +typedef long (*syncfunc_t)(struct task_struct *, struct switch_stack *, + unsigned long, unsigned long); + +static void do_sync_rbs(struct unw_frame_info *info, void *arg) +{ + struct pt_regs *pt; + unsigned long urbs_end; + syncfunc_t fn = arg; + + if (unw_unwind_to_user(info) < 0) + return; + pt = task_pt_regs(info->task); + urbs_end = ia64_get_user_rbs_end(info->task, pt, NULL); + + fn(info->task, info->sw, pt->ar_bspstore, urbs_end); +} + +/* + * when a thread is stopped (ptraced), debugger might change thread's user + * stack (change memory directly), and we must avoid the RSE stored in kernel + * to override user stack (user space's RSE is newer than kernel's in the + * case). To workaround the issue, we copy kernel RSE to user RSE before the + * task is stopped, so user RSE has updated data. we then copy user RSE to + * kernel after the task is resummed from traced stop and kernel will use the + * newer RSE to return to user. TIF_RESTORE_RSE is the flag to indicate we need + * synchronize user RSE to kernel. + */ +void ia64_ptrace_stop(void) +{ + if (test_and_set_tsk_thread_flag(current, TIF_RESTORE_RSE)) + return; + set_notify_resume(current); + unw_init_running(do_sync_rbs, ia64_sync_user_rbs); +} + +/* + * This is called to read back the register backing store. + */ +void ia64_sync_krbs(void) +{ + clear_tsk_thread_flag(current, TIF_RESTORE_RSE); + + unw_init_running(do_sync_rbs, ia64_sync_kernel_rbs); +} + +/* + * Write f32-f127 back to task->thread.fph if it has been modified. + */ +inline void +ia64_flush_fph (struct task_struct *task) +{ + struct ia64_psr *psr = ia64_psr(task_pt_regs(task)); + + /* + * Prevent migrating this task while + * we're fiddling with the FPU state + */ + preempt_disable(); + if (ia64_is_local_fpu_owner(task) && psr->mfh) { + psr->mfh = 0; + task->thread.flags |= IA64_THREAD_FPH_VALID; + ia64_save_fpu(&task->thread.fph[0]); + } + preempt_enable(); +} + +/* + * Sync the fph state of the task so that it can be manipulated + * through thread.fph. If necessary, f32-f127 are written back to + * thread.fph or, if the fph state hasn't been used before, thread.fph + * is cleared to zeroes. Also, access to f32-f127 is disabled to + * ensure that the task picks up the state from thread.fph when it + * executes again. + */ +void +ia64_sync_fph (struct task_struct *task) +{ + struct ia64_psr *psr = ia64_psr(task_pt_regs(task)); + + ia64_flush_fph(task); + if (!(task->thread.flags & IA64_THREAD_FPH_VALID)) { + task->thread.flags |= IA64_THREAD_FPH_VALID; + memset(&task->thread.fph, 0, sizeof(task->thread.fph)); + } + ia64_drop_fpu(task); + psr->dfh = 1; +} + +/* + * Change the machine-state of CHILD such that it will return via the normal + * kernel exit-path, rather than the syscall-exit path. + */ +static void +convert_to_non_syscall (struct task_struct *child, struct pt_regs *pt, + unsigned long cfm) +{ + struct unw_frame_info info, prev_info; + unsigned long ip, sp, pr; + + unw_init_from_blocked_task(&info, child); + while (1) { + prev_info = info; + if (unw_unwind(&info) < 0) + return; + + unw_get_sp(&info, &sp); + if ((long)((unsigned long)child + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + dprintk("ptrace.%s: ran off the top of the kernel " + "stack\n", __func__); + return; + } + if (unw_get_pr (&prev_info, &pr) < 0) { + unw_get_rp(&prev_info, &ip); + dprintk("ptrace.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __func__, ip); + return; + } + if (unw_is_intr_frame(&info) + && (pr & (1UL << PRED_USER_STACK))) + break; + } + + /* + * Note: at the time of this call, the target task is blocked + * in notify_resume_user() and by clearling PRED_LEAVE_SYSCALL + * (aka, "pLvSys") we redirect execution from + * .work_pending_syscall_end to .work_processed_kernel. + */ + unw_get_pr(&prev_info, &pr); + pr &= ~((1UL << PRED_SYSCALL) | (1UL << PRED_LEAVE_SYSCALL)); + pr |= (1UL << PRED_NON_SYSCALL); + unw_set_pr(&prev_info, pr); + + pt->cr_ifs = (1UL << 63) | cfm; + /* + * Clear the memory that is NOT written on syscall-entry to + * ensure we do not leak kernel-state to user when execution + * resumes. + */ + pt->r2 = 0; + pt->r3 = 0; + pt->r14 = 0; + memset(&pt->r16, 0, 16*8); /* clear r16-r31 */ + memset(&pt->f6, 0, 6*16); /* clear f6-f11 */ + pt->b7 = 0; + pt->ar_ccv = 0; + pt->ar_csd = 0; + pt->ar_ssd = 0; +} + +static int +access_nat_bits (struct task_struct *child, struct pt_regs *pt, + struct unw_frame_info *info, + unsigned long *data, int write_access) +{ + unsigned long regnum, nat_bits, scratch_unat, dummy = 0; + char nat = 0; + + if (write_access) { + nat_bits = *data; + scratch_unat = ia64_put_scratch_nat_bits(pt, nat_bits); + if (unw_set_ar(info, UNW_AR_UNAT, scratch_unat) < 0) { + dprintk("ptrace: failed to set ar.unat\n"); + return -1; + } + for (regnum = 4; regnum <= 7; ++regnum) { + unw_get_gr(info, regnum, &dummy, &nat); + unw_set_gr(info, regnum, dummy, + (nat_bits >> regnum) & 1); + } + } else { + if (unw_get_ar(info, UNW_AR_UNAT, &scratch_unat) < 0) { + dprintk("ptrace: failed to read ar.unat\n"); + return -1; + } + nat_bits = ia64_get_scratch_nat_bits(pt, scratch_unat); + for (regnum = 4; regnum <= 7; ++regnum) { + unw_get_gr(info, regnum, &dummy, &nat); + nat_bits |= (nat != 0) << regnum; + } + *data = nat_bits; + } + return 0; +} + +static int +access_elf_reg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access); + +static long +ptrace_getregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) +{ + unsigned long psr, ec, lc, rnat, bsp, cfm, nat_bits, val; + struct unw_frame_info info; + struct ia64_fpreg fpval; + struct switch_stack *sw; + struct pt_regs *pt; + long ret, retval = 0; + char nat = 0; + int i; + + if (!access_ok(ppr, sizeof(struct pt_all_user_regs))) + return -EIO; + + pt = task_pt_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) { + return -EIO; + } + + if (((unsigned long) ppr & 0x7) != 0) { + dprintk("ptrace:unaligned register address %p\n", ppr); + return -EIO; + } + + if (access_elf_reg(child, &info, ELF_CR_IPSR_OFFSET, &psr, 0) < 0 || + access_elf_reg(child, &info, ELF_AR_EC_OFFSET, &ec, 0) < 0 || + access_elf_reg(child, &info, ELF_AR_LC_OFFSET, &lc, 0) < 0 || + access_elf_reg(child, &info, ELF_AR_RNAT_OFFSET, &rnat, 0) < 0 || + access_elf_reg(child, &info, ELF_AR_BSP_OFFSET, &bsp, 0) < 0 || + access_elf_reg(child, &info, ELF_CFM_OFFSET, &cfm, 0) < 0 || + access_elf_reg(child, &info, ELF_NAT_OFFSET, &nat_bits, 0) < 0) + return -EIO; + + /* control regs */ + + retval |= __put_user(pt->cr_iip, &ppr->cr_iip); + retval |= __put_user(psr, &ppr->cr_ipsr); + + /* app regs */ + + retval |= __put_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); + retval |= __put_user(pt->ar_rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __put_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); + retval |= __put_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); + retval |= __put_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); + retval |= __put_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); + + retval |= __put_user(ec, &ppr->ar[PT_AUR_EC]); + retval |= __put_user(lc, &ppr->ar[PT_AUR_LC]); + retval |= __put_user(rnat, &ppr->ar[PT_AUR_RNAT]); + retval |= __put_user(bsp, &ppr->ar[PT_AUR_BSP]); + retval |= __put_user(cfm, &ppr->cfm); + + /* gr1-gr3 */ + + retval |= __copy_to_user(&ppr->gr[1], &pt->r1, sizeof(long)); + retval |= __copy_to_user(&ppr->gr[2], &pt->r2, sizeof(long) *2); + + /* gr4-gr7 */ + + for (i = 4; i < 8; i++) { + if (unw_access_gr(&info, i, &val, &nat, 0) < 0) + return -EIO; + retval |= __put_user(val, &ppr->gr[i]); + } + + /* gr8-gr11 */ + + retval |= __copy_to_user(&ppr->gr[8], &pt->r8, sizeof(long) * 4); + + /* gr12-gr15 */ + + retval |= __copy_to_user(&ppr->gr[12], &pt->r12, sizeof(long) * 2); + retval |= __copy_to_user(&ppr->gr[14], &pt->r14, sizeof(long)); + retval |= __copy_to_user(&ppr->gr[15], &pt->r15, sizeof(long)); + + /* gr16-gr31 */ + + retval |= __copy_to_user(&ppr->gr[16], &pt->r16, sizeof(long) * 16); + + /* b0 */ + + retval |= __put_user(pt->b0, &ppr->br[0]); + + /* b1-b5 */ + + for (i = 1; i < 6; i++) { + if (unw_access_br(&info, i, &val, 0) < 0) + return -EIO; + __put_user(val, &ppr->br[i]); + } + + /* b6-b7 */ + + retval |= __put_user(pt->b6, &ppr->br[6]); + retval |= __put_user(pt->b7, &ppr->br[7]); + + /* fr2-fr5 */ + + for (i = 2; i < 6; i++) { + if (unw_get_fr(&info, i, &fpval) < 0) + return -EIO; + retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); + } + + /* fr6-fr11 */ + + retval |= __copy_to_user(&ppr->fr[6], &pt->f6, + sizeof(struct ia64_fpreg) * 6); + + /* fp scratch regs(12-15) */ + + retval |= __copy_to_user(&ppr->fr[12], &sw->f12, + sizeof(struct ia64_fpreg) * 4); + + /* fr16-fr31 */ + + for (i = 16; i < 32; i++) { + if (unw_get_fr(&info, i, &fpval) < 0) + return -EIO; + retval |= __copy_to_user(&ppr->fr[i], &fpval, sizeof (fpval)); + } + + /* fph */ + + ia64_flush_fph(child); + retval |= __copy_to_user(&ppr->fr[32], &child->thread.fph, + sizeof(ppr->fr[32]) * 96); + + /* preds */ + + retval |= __put_user(pt->pr, &ppr->pr); + + /* nat bits */ + + retval |= __put_user(nat_bits, &ppr->nat); + + ret = retval ? -EIO : 0; + return ret; +} + +static long +ptrace_setregs (struct task_struct *child, struct pt_all_user_regs __user *ppr) +{ + unsigned long psr, rsc, ec, lc, rnat, bsp, cfm, nat_bits, val = 0; + struct unw_frame_info info; + struct switch_stack *sw; + struct ia64_fpreg fpval; + struct pt_regs *pt; + long retval = 0; + int i; + + memset(&fpval, 0, sizeof(fpval)); + + if (!access_ok(ppr, sizeof(struct pt_all_user_regs))) + return -EIO; + + pt = task_pt_regs(child); + sw = (struct switch_stack *) (child->thread.ksp + 16); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) { + return -EIO; + } + + if (((unsigned long) ppr & 0x7) != 0) { + dprintk("ptrace:unaligned register address %p\n", ppr); + return -EIO; + } + + /* control regs */ + + retval |= __get_user(pt->cr_iip, &ppr->cr_iip); + retval |= __get_user(psr, &ppr->cr_ipsr); + + /* app regs */ + + retval |= __get_user(pt->ar_pfs, &ppr->ar[PT_AUR_PFS]); + retval |= __get_user(rsc, &ppr->ar[PT_AUR_RSC]); + retval |= __get_user(pt->ar_bspstore, &ppr->ar[PT_AUR_BSPSTORE]); + retval |= __get_user(pt->ar_unat, &ppr->ar[PT_AUR_UNAT]); + retval |= __get_user(pt->ar_ccv, &ppr->ar[PT_AUR_CCV]); + retval |= __get_user(pt->ar_fpsr, &ppr->ar[PT_AUR_FPSR]); + + retval |= __get_user(ec, &ppr->ar[PT_AUR_EC]); + retval |= __get_user(lc, &ppr->ar[PT_AUR_LC]); + retval |= __get_user(rnat, &ppr->ar[PT_AUR_RNAT]); + retval |= __get_user(bsp, &ppr->ar[PT_AUR_BSP]); + retval |= __get_user(cfm, &ppr->cfm); + + /* gr1-gr3 */ + + retval |= __copy_from_user(&pt->r1, &ppr->gr[1], sizeof(long)); + retval |= __copy_from_user(&pt->r2, &ppr->gr[2], sizeof(long) * 2); + + /* gr4-gr7 */ + + for (i = 4; i < 8; i++) { + retval |= __get_user(val, &ppr->gr[i]); + /* NaT bit will be set via PT_NAT_BITS: */ + if (unw_set_gr(&info, i, val, 0) < 0) + return -EIO; + } + + /* gr8-gr11 */ + + retval |= __copy_from_user(&pt->r8, &ppr->gr[8], sizeof(long) * 4); + + /* gr12-gr15 */ + + retval |= __copy_from_user(&pt->r12, &ppr->gr[12], sizeof(long) * 2); + retval |= __copy_from_user(&pt->r14, &ppr->gr[14], sizeof(long)); + retval |= __copy_from_user(&pt->r15, &ppr->gr[15], sizeof(long)); + + /* gr16-gr31 */ + + retval |= __copy_from_user(&pt->r16, &ppr->gr[16], sizeof(long) * 16); + + /* b0 */ + + retval |= __get_user(pt->b0, &ppr->br[0]); + + /* b1-b5 */ + + for (i = 1; i < 6; i++) { + retval |= __get_user(val, &ppr->br[i]); + unw_set_br(&info, i, val); + } + + /* b6-b7 */ + + retval |= __get_user(pt->b6, &ppr->br[6]); + retval |= __get_user(pt->b7, &ppr->br[7]); + + /* fr2-fr5 */ + + for (i = 2; i < 6; i++) { + retval |= __copy_from_user(&fpval, &ppr->fr[i], sizeof(fpval)); + if (unw_set_fr(&info, i, fpval) < 0) + return -EIO; + } + + /* fr6-fr11 */ + + retval |= __copy_from_user(&pt->f6, &ppr->fr[6], + sizeof(ppr->fr[6]) * 6); + + /* fp scratch regs(12-15) */ + + retval |= __copy_from_user(&sw->f12, &ppr->fr[12], + sizeof(ppr->fr[12]) * 4); + + /* fr16-fr31 */ + + for (i = 16; i < 32; i++) { + retval |= __copy_from_user(&fpval, &ppr->fr[i], + sizeof(fpval)); + if (unw_set_fr(&info, i, fpval) < 0) + return -EIO; + } + + /* fph */ + + ia64_sync_fph(child); + retval |= __copy_from_user(&child->thread.fph, &ppr->fr[32], + sizeof(ppr->fr[32]) * 96); + + /* preds */ + + retval |= __get_user(pt->pr, &ppr->pr); + + /* nat bits */ + + retval |= __get_user(nat_bits, &ppr->nat); + + retval |= access_elf_reg(child, &info, ELF_CR_IPSR_OFFSET, &psr, 1); + retval |= access_elf_reg(child, &info, ELF_AR_RSC_OFFSET, &rsc, 1); + retval |= access_elf_reg(child, &info, ELF_AR_EC_OFFSET, &ec, 1); + retval |= access_elf_reg(child, &info, ELF_AR_LC_OFFSET, &lc, 1); + retval |= access_elf_reg(child, &info, ELF_AR_RNAT_OFFSET, &rnat, 1); + retval |= access_elf_reg(child, &info, ELF_AR_BSP_OFFSET, &bsp, 1); + retval |= access_elf_reg(child, &info, ELF_CFM_OFFSET, &cfm, 1); + retval |= access_elf_reg(child, &info, ELF_NAT_OFFSET, &nat_bits, 1); + + return retval ? -EIO : 0; +} + +void +user_enable_single_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->ss = 1; +} + +void +user_enable_block_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + set_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->tb = 1; +} + +void +user_disable_single_step (struct task_struct *child) +{ + struct ia64_psr *child_psr = ia64_psr(task_pt_regs(child)); + + /* make sure the single step/taken-branch trap bits are not set: */ + clear_tsk_thread_flag(child, TIF_SINGLESTEP); + child_psr->ss = 0; + child_psr->tb = 0; +} + +/* + * Called by kernel/ptrace.c when detaching.. + * + * Make sure the single step bit is not set. + */ +void +ptrace_disable (struct task_struct *child) +{ + user_disable_single_step(child); +} + +static int +access_uarea (struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access); + +long +arch_ptrace (struct task_struct *child, long request, + unsigned long addr, unsigned long data) +{ + switch (request) { + case PTRACE_PEEKTEXT: + case PTRACE_PEEKDATA: + /* read word at location addr */ + if (ptrace_access_vm(child, addr, &data, sizeof(data), + FOLL_FORCE) + != sizeof(data)) + return -EIO; + /* ensure return value is not mistaken for error code */ + force_successful_syscall_return(); + return data; + + /* PTRACE_POKETEXT and PTRACE_POKEDATA is handled + * by the generic ptrace_request(). + */ + + case PTRACE_PEEKUSR: + /* read the word at addr in the USER area */ + if (access_uarea(child, addr, &data, 0) < 0) + return -EIO; + /* ensure return value is not mistaken for error code */ + force_successful_syscall_return(); + return data; + + case PTRACE_POKEUSR: + /* write the word at addr in the USER area */ + if (access_uarea(child, addr, &data, 1) < 0) + return -EIO; + return 0; + + case PTRACE_OLD_GETSIGINFO: + /* for backwards-compatibility */ + return ptrace_request(child, PTRACE_GETSIGINFO, addr, data); + + case PTRACE_OLD_SETSIGINFO: + /* for backwards-compatibility */ + return ptrace_request(child, PTRACE_SETSIGINFO, addr, data); + + case PTRACE_GETREGS: + return ptrace_getregs(child, + (struct pt_all_user_regs __user *) data); + + case PTRACE_SETREGS: + return ptrace_setregs(child, + (struct pt_all_user_regs __user *) data); + + default: + return ptrace_request(child, request, addr, data); + } +} + + +/* "asmlinkage" so the input arguments are preserved... */ + +asmlinkage long +syscall_trace_enter (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + if (ptrace_report_syscall_entry(®s)) + return -ENOSYS; + + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); + + + audit_syscall_entry(regs.r15, arg0, arg1, arg2, arg3); + + return 0; +} + +/* "asmlinkage" so the input arguments are preserved... */ + +asmlinkage void +syscall_trace_leave (long arg0, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + int step; + + audit_syscall_exit(®s); + + step = test_thread_flag(TIF_SINGLESTEP); + if (step || test_thread_flag(TIF_SYSCALL_TRACE)) + ptrace_report_syscall_exit(®s, step); + + /* copy user rbs to kernel rbs */ + if (test_thread_flag(TIF_RESTORE_RSE)) + ia64_sync_krbs(); +} + +/* Utrace implementation starts here */ +struct regset_get { + void *kbuf; + void __user *ubuf; +}; + +struct regset_set { + const void *kbuf; + const void __user *ubuf; +}; + +struct regset_getset { + struct task_struct *target; + const struct user_regset *regset; + union { + struct regset_get get; + struct regset_set set; + } u; + unsigned int pos; + unsigned int count; + int ret; +}; + +static const ptrdiff_t pt_offsets[32] = +{ +#define R(n) offsetof(struct pt_regs, r##n) + [0] = -1, R(1), R(2), R(3), + [4] = -1, [5] = -1, [6] = -1, [7] = -1, + R(8), R(9), R(10), R(11), R(12), R(13), R(14), R(15), + R(16), R(17), R(18), R(19), R(20), R(21), R(22), R(23), + R(24), R(25), R(26), R(27), R(28), R(29), R(30), R(31), +#undef R +}; + +static int +access_elf_gpreg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt = task_pt_regs(target); + unsigned reg = addr / sizeof(unsigned long); + ptrdiff_t d = pt_offsets[reg]; + + if (d >= 0) { + unsigned long *ptr = (void *)pt + d; + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; + } else { + char nat = 0; + if (write_access) { + /* read NaT bit first: */ + unsigned long dummy; + int ret = unw_get_gr(info, reg, &dummy, &nat); + if (ret < 0) + return ret; + } + return unw_access_gr(info, reg, data, &nat, write_access); + } +} + +static int +access_elf_breg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + switch (addr) { + case ELF_BR_OFFSET(0): + ptr = &pt->b0; + break; + case ELF_BR_OFFSET(1) ... ELF_BR_OFFSET(5): + return unw_access_br(info, (addr - ELF_BR_OFFSET(0))/8, + data, write_access); + case ELF_BR_OFFSET(6): + ptr = &pt->b6; + break; + case ELF_BR_OFFSET(7): + ptr = &pt->b7; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static int +access_elf_areg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + struct pt_regs *pt; + unsigned long cfm, urbs_end; + unsigned long *ptr = NULL; + + pt = task_pt_regs(target); + if (addr >= ELF_AR_RSC_OFFSET && addr <= ELF_AR_SSD_OFFSET) { + switch (addr) { + case ELF_AR_RSC_OFFSET: + /* force PL3 */ + if (write_access) + pt->ar_rsc = *data | (3 << 2); + else + *data = pt->ar_rsc; + return 0; + case ELF_AR_BSP_OFFSET: + /* + * By convention, we use PT_AR_BSP to refer to + * the end of the user-level backing store. + * Use ia64_rse_skip_regs(PT_AR_BSP, -CFM.sof) + * to get the real value of ar.bsp at the time + * the kernel was entered. + * + * Furthermore, when changing the contents of + * PT_AR_BSP (or PT_CFM) while the task is + * blocked in a system call, convert the state + * so that the non-system-call exit + * path is used. This ensures that the proper + * state will be picked up when resuming + * execution. However, it *also* means that + * once we write PT_AR_BSP/PT_CFM, it won't be + * possible to modify the syscall arguments of + * the pending system call any longer. This + * shouldn't be an issue because modifying + * PT_AR_BSP/PT_CFM generally implies that + * we're either abandoning the pending system + * call or that we defer it's re-execution + * (e.g., due to GDB doing an inferior + * function call). + */ + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (*data != urbs_end) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + /* + * Simulate user-level write + * of ar.bsp: + */ + pt->loadrs = 0; + pt->ar_bspstore = *data; + } + } else + *data = urbs_end; + return 0; + case ELF_AR_BSPSTORE_OFFSET: + ptr = &pt->ar_bspstore; + break; + case ELF_AR_RNAT_OFFSET: + ptr = &pt->ar_rnat; + break; + case ELF_AR_CCV_OFFSET: + ptr = &pt->ar_ccv; + break; + case ELF_AR_UNAT_OFFSET: + ptr = &pt->ar_unat; + break; + case ELF_AR_FPSR_OFFSET: + ptr = &pt->ar_fpsr; + break; + case ELF_AR_PFS_OFFSET: + ptr = &pt->ar_pfs; + break; + case ELF_AR_LC_OFFSET: + return unw_access_ar(info, UNW_AR_LC, data, + write_access); + case ELF_AR_EC_OFFSET: + return unw_access_ar(info, UNW_AR_EC, data, + write_access); + case ELF_AR_CSD_OFFSET: + ptr = &pt->ar_csd; + break; + case ELF_AR_SSD_OFFSET: + ptr = &pt->ar_ssd; + } + } else if (addr >= ELF_CR_IIP_OFFSET && addr <= ELF_CR_IPSR_OFFSET) { + switch (addr) { + case ELF_CR_IIP_OFFSET: + ptr = &pt->cr_iip; + break; + case ELF_CFM_OFFSET: + urbs_end = ia64_get_user_rbs_end(target, pt, &cfm); + if (write_access) { + if (((cfm ^ *data) & PFM_MASK) != 0) { + if (in_syscall(pt)) + convert_to_non_syscall(target, + pt, + cfm); + pt->cr_ifs = ((pt->cr_ifs & ~PFM_MASK) + | (*data & PFM_MASK)); + } + } else + *data = cfm; + return 0; + case ELF_CR_IPSR_OFFSET: + if (write_access) { + unsigned long tmp = *data; + /* psr.ri==3 is a reserved value: SDM 2:25 */ + if ((tmp & IA64_PSR_RI) == IA64_PSR_RI) + tmp &= ~IA64_PSR_RI; + pt->cr_ipsr = ((tmp & IPSR_MASK) + | (pt->cr_ipsr & ~IPSR_MASK)); + } else + *data = (pt->cr_ipsr & IPSR_MASK); + return 0; + } + } else if (addr == ELF_NAT_OFFSET) + return access_nat_bits(target, pt, info, + data, write_access); + else if (addr == ELF_PR_OFFSET) + ptr = &pt->pr; + else + return -1; + + if (write_access) + *ptr = *data; + else + *data = *ptr; + + return 0; +} + +static int +access_elf_reg(struct task_struct *target, struct unw_frame_info *info, + unsigned long addr, unsigned long *data, int write_access) +{ + if (addr >= ELF_GR_OFFSET(1) && addr <= ELF_GR_OFFSET(31)) + return access_elf_gpreg(target, info, addr, data, write_access); + else if (addr >= ELF_BR_OFFSET(0) && addr <= ELF_BR_OFFSET(7)) + return access_elf_breg(target, info, addr, data, write_access); + else + return access_elf_areg(target, info, addr, data, write_access); +} + +struct regset_membuf { + struct membuf to; + int ret; +}; + +static void do_gpregs_get(struct unw_frame_info *info, void *arg) +{ + struct regset_membuf *dst = arg; + struct membuf to = dst->to; + unsigned int n; + elf_greg_t reg; + + if (unw_unwind_to_user(info) < 0) + return; + + /* + * coredump format: + * r0-r31 + * NaT bits (for r0-r31; bit N == 1 iff rN is a NaT) + * predicate registers (p0-p63) + * b0-b7 + * ip cfm user-mask + * ar.rsc ar.bsp ar.bspstore ar.rnat + * ar.ccv ar.unat ar.fpsr ar.pfs ar.lc ar.ec + */ + + + /* Skip r0 */ + membuf_zero(&to, 8); + for (n = 8; to.left && n < ELF_AR_END_OFFSET; n += 8) { + if (access_elf_reg(info->task, info, n, ®, 0) < 0) { + dst->ret = -EIO; + return; + } + membuf_store(&to, reg); + } +} + +static void do_gpregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + + if (unw_unwind_to_user(info) < 0) + return; + + if (!dst->count) + return; + /* Skip r0 */ + if (dst->pos < ELF_GR_OFFSET(1)) { + user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, + 0, ELF_GR_OFFSET(1)); + dst->ret = 0; + } + + while (dst->count && dst->pos < ELF_AR_END_OFFSET) { + unsigned int n, from, to; + elf_greg_t tmp[16]; + + from = dst->pos; + to = from + sizeof(tmp); + if (to > ELF_AR_END_OFFSET) + to = ELF_AR_END_OFFSET; + /* get up to 16 values */ + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + from, to); + if (dst->ret) + return; + /* now copy them into registers */ + for (n = 0; from < dst->pos; from += sizeof(elf_greg_t), n++) + if (access_elf_reg(dst->target, info, from, + &tmp[n], 1) < 0) { + dst->ret = -EIO; + return; + } + } +} + +#define ELF_FP_OFFSET(i) (i * sizeof(elf_fpreg_t)) + +static void do_fpregs_get(struct unw_frame_info *info, void *arg) +{ + struct task_struct *task = info->task; + struct regset_membuf *dst = arg; + struct membuf to = dst->to; + elf_fpreg_t reg; + unsigned int n; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + membuf_zero(&to, 2 * sizeof(elf_fpreg_t)); + + /* fr2-fr31 */ + for (n = 2; to.left && n < 32; n++) { + if (unw_get_fr(info, n, ®)) { + dst->ret = -EIO; + return; + } + membuf_write(&to, ®, sizeof(reg)); + } + + /* fph */ + if (!to.left) + return; + + ia64_flush_fph(task); + if (task->thread.flags & IA64_THREAD_FPH_VALID) + membuf_write(&to, &task->thread.fph, 96 * sizeof(reg)); + else + membuf_zero(&to, 96 * sizeof(reg)); +} + +static void do_fpregs_set(struct unw_frame_info *info, void *arg) +{ + struct regset_getset *dst = arg; + elf_fpreg_t fpreg, tmp[30]; + int index, start, end; + + if (unw_unwind_to_user(info) < 0) + return; + + /* Skip pos 0 and 1 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(2)) { + user_regset_copyin_ignore(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, + 0, ELF_FP_OFFSET(2)); + dst->ret = 0; + if (dst->count == 0) + return; + } + + /* fr2-fr31 */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(32)) { + start = dst->pos; + end = min(((unsigned int)ELF_FP_OFFSET(32)), + dst->pos + dst->count); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, &dst->u.set.ubuf, tmp, + ELF_FP_OFFSET(2), ELF_FP_OFFSET(32)); + if (dst->ret) + return; + + if (start & 0xF) { /* only write high part */ + if (unw_get_fr(info, start / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[start / sizeof(elf_fpreg_t) - 2].u.bits[0] + = fpreg.u.bits[0]; + start &= ~0xFUL; + } + if (end & 0xF) { /* only write low part */ + if (unw_get_fr(info, end / sizeof(elf_fpreg_t), + &fpreg)) { + dst->ret = -EIO; + return; + } + tmp[end / sizeof(elf_fpreg_t) - 2].u.bits[1] + = fpreg.u.bits[1]; + end = (end + 0xF) & ~0xFUL; + } + + for ( ; start < end ; start += sizeof(elf_fpreg_t)) { + index = start / sizeof(elf_fpreg_t); + if (unw_set_fr(info, index, tmp[index - 2])) { + dst->ret = -EIO; + return; + } + } + if (dst->ret || dst->count == 0) + return; + } + + /* fph */ + if (dst->count > 0 && dst->pos < ELF_FP_OFFSET(128)) { + ia64_sync_fph(dst->target); + dst->ret = user_regset_copyin(&dst->pos, &dst->count, + &dst->u.set.kbuf, + &dst->u.set.ubuf, + &dst->target->thread.fph, + ELF_FP_OFFSET(32), -1); + } +} + +static void +unwind_and_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, void *data) +{ + if (target == current) + unw_init_running(call, data); + else { + struct unw_frame_info info; + memset(&info, 0, sizeof(info)); + unw_init_from_blocked_task(&info, target); + (*call)(&info, data); + } +} + +static int +do_regset_call(void (*call)(struct unw_frame_info *, void *), + struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct regset_getset info = { .target = target, .regset = regset, + .pos = pos, .count = count, + .u.set = { .kbuf = kbuf, .ubuf = ubuf }, + .ret = 0 }; + unwind_and_call(call, target, &info); + return info.ret; +} + +static int +gpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct regset_membuf info = {.to = to}; + unwind_and_call(do_gpregs_get, target, &info); + return info.ret; +} + +static int gpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_gpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static void do_gpregs_writeback(struct unw_frame_info *info, void *arg) +{ + do_sync_rbs(info, ia64_sync_user_rbs); +} + +/* + * This is called to write back the register backing store. + * ptrace does this before it stops, so that a tracer reading the user + * memory after the thread stops will get the current register data. + */ +static int +gpregs_writeback(struct task_struct *target, + const struct user_regset *regset, + int now) +{ + if (test_and_set_tsk_thread_flag(target, TIF_RESTORE_RSE)) + return 0; + set_notify_resume(target); + return do_regset_call(do_gpregs_writeback, target, regset, 0, 0, + NULL, NULL); +} + +static int +fpregs_active(struct task_struct *target, const struct user_regset *regset) +{ + return (target->thread.flags & IA64_THREAD_FPH_VALID) ? 128 : 32; +} + +static int fpregs_get(struct task_struct *target, + const struct user_regset *regset, + struct membuf to) +{ + struct regset_membuf info = {.to = to}; + unwind_and_call(do_fpregs_get, target, &info); + return info.ret; +} + +static int fpregs_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + return do_regset_call(do_fpregs_set, target, regset, pos, count, + kbuf, ubuf); +} + +static int +access_uarea(struct task_struct *child, unsigned long addr, + unsigned long *data, int write_access) +{ + unsigned int pos = -1; /* an invalid value */ + unsigned long *ptr, regnum; + + if ((addr & 0x7) != 0) { + dprintk("ptrace: unaligned register address 0x%lx\n", addr); + return -1; + } + if ((addr >= PT_NAT_BITS + 8 && addr < PT_F2) || + (addr >= PT_R7 + 8 && addr < PT_B1) || + (addr >= PT_AR_LC + 8 && addr < PT_CR_IPSR) || + (addr >= PT_AR_SSD + 8 && addr < PT_DBR)) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + + switch (addr) { + case PT_F32 ... (PT_F127 + 15): + pos = addr - PT_F32 + ELF_FP_OFFSET(32); + break; + case PT_F2 ... (PT_F5 + 15): + pos = addr - PT_F2 + ELF_FP_OFFSET(2); + break; + case PT_F10 ... (PT_F31 + 15): + pos = addr - PT_F10 + ELF_FP_OFFSET(10); + break; + case PT_F6 ... (PT_F9 + 15): + pos = addr - PT_F6 + ELF_FP_OFFSET(6); + break; + } + + if (pos != -1) { + unsigned reg = pos / sizeof(elf_fpreg_t); + int which_half = (pos / sizeof(unsigned long)) & 1; + + if (reg < 32) { /* fr2-fr31 */ + struct unw_frame_info info; + elf_fpreg_t fpreg; + + memset(&info, 0, sizeof(info)); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) + return 0; + + if (unw_get_fr(&info, reg, &fpreg)) + return -1; + if (write_access) { + fpreg.u.bits[which_half] = *data; + if (unw_set_fr(&info, reg, fpreg)) + return -1; + } else { + *data = fpreg.u.bits[which_half]; + } + } else { /* fph */ + elf_fpreg_t *p = &child->thread.fph[reg - 32]; + unsigned long *bits = &p->u.bits[which_half]; + + ia64_sync_fph(child); + if (write_access) + *bits = *data; + else if (child->thread.flags & IA64_THREAD_FPH_VALID) + *data = *bits; + else + *data = 0; + } + return 0; + } + + switch (addr) { + case PT_NAT_BITS: + pos = ELF_NAT_OFFSET; + break; + case PT_R4 ... PT_R7: + pos = addr - PT_R4 + ELF_GR_OFFSET(4); + break; + case PT_B1 ... PT_B5: + pos = addr - PT_B1 + ELF_BR_OFFSET(1); + break; + case PT_AR_EC: + pos = ELF_AR_EC_OFFSET; + break; + case PT_AR_LC: + pos = ELF_AR_LC_OFFSET; + break; + case PT_CR_IPSR: + pos = ELF_CR_IPSR_OFFSET; + break; + case PT_CR_IIP: + pos = ELF_CR_IIP_OFFSET; + break; + case PT_CFM: + pos = ELF_CFM_OFFSET; + break; + case PT_AR_UNAT: + pos = ELF_AR_UNAT_OFFSET; + break; + case PT_AR_PFS: + pos = ELF_AR_PFS_OFFSET; + break; + case PT_AR_RSC: + pos = ELF_AR_RSC_OFFSET; + break; + case PT_AR_RNAT: + pos = ELF_AR_RNAT_OFFSET; + break; + case PT_AR_BSPSTORE: + pos = ELF_AR_BSPSTORE_OFFSET; + break; + case PT_PR: + pos = ELF_PR_OFFSET; + break; + case PT_B6: + pos = ELF_BR_OFFSET(6); + break; + case PT_AR_BSP: + pos = ELF_AR_BSP_OFFSET; + break; + case PT_R1 ... PT_R3: + pos = addr - PT_R1 + ELF_GR_OFFSET(1); + break; + case PT_R12 ... PT_R15: + pos = addr - PT_R12 + ELF_GR_OFFSET(12); + break; + case PT_R8 ... PT_R11: + pos = addr - PT_R8 + ELF_GR_OFFSET(8); + break; + case PT_R16 ... PT_R31: + pos = addr - PT_R16 + ELF_GR_OFFSET(16); + break; + case PT_AR_CCV: + pos = ELF_AR_CCV_OFFSET; + break; + case PT_AR_FPSR: + pos = ELF_AR_FPSR_OFFSET; + break; + case PT_B0: + pos = ELF_BR_OFFSET(0); + break; + case PT_B7: + pos = ELF_BR_OFFSET(7); + break; + case PT_AR_CSD: + pos = ELF_AR_CSD_OFFSET; + break; + case PT_AR_SSD: + pos = ELF_AR_SSD_OFFSET; + break; + } + + if (pos != -1) { + struct unw_frame_info info; + + memset(&info, 0, sizeof(info)); + unw_init_from_blocked_task(&info, child); + if (unw_unwind_to_user(&info) < 0) + return 0; + + return access_elf_reg(child, &info, pos, data, write_access); + } + + /* access debug registers */ + if (addr >= PT_IBR) { + regnum = (addr - PT_IBR) >> 3; + ptr = &child->thread.ibr[0]; + } else { + regnum = (addr - PT_DBR) >> 3; + ptr = &child->thread.dbr[0]; + } + + if (regnum >= 8) { + dprintk("ptrace: rejecting access to register " + "address 0x%lx\n", addr); + return -1; + } + + if (!(child->thread.flags & IA64_THREAD_DBG_VALID)) { + child->thread.flags |= IA64_THREAD_DBG_VALID; + memset(child->thread.dbr, 0, + sizeof(child->thread.dbr)); + memset(child->thread.ibr, 0, + sizeof(child->thread.ibr)); + } + + ptr += regnum; + + if ((regnum & 1) && write_access) { + /* don't let the user set kernel-level breakpoints: */ + *ptr = *data & ~(7UL << 56); + return 0; + } + if (write_access) + *ptr = *data; + else + *data = *ptr; + return 0; +} + +static const struct user_regset native_regsets[] = { + { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(elf_greg_t), .align = sizeof(elf_greg_t), + .regset_get = gpregs_get, .set = gpregs_set, + .writeback = gpregs_writeback + }, + { + .core_note_type = NT_PRFPREG, + .n = ELF_NFPREG, + .size = sizeof(elf_fpreg_t), .align = sizeof(elf_fpreg_t), + .regset_get = fpregs_get, .set = fpregs_set, .active = fpregs_active + }, +}; + +static const struct user_regset_view user_ia64_view = { + .name = "ia64", + .e_machine = EM_IA_64, + .regsets = native_regsets, .n = ARRAY_SIZE(native_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *tsk) +{ + return &user_ia64_view; +} + +struct syscall_get_args { + unsigned int i; + unsigned int n; + unsigned long *args; + struct pt_regs *regs; +}; + +static void syscall_get_args_cb(struct unw_frame_info *info, void *data) +{ + struct syscall_get_args *args = data; + struct pt_regs *pt = args->regs; + unsigned long *krbs, cfm, ndirty, nlocals, nouts; + int i, count; + + if (unw_unwind_to_user(info) < 0) + return; + + /* + * We get here via a few paths: + * - break instruction: cfm is shared with caller. + * syscall args are in out= regs, locals are non-empty. + * - epsinstruction: cfm is set by br.call + * locals don't exist. + * + * For both cases arguments are reachable in cfm.sof - cfm.sol. + * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ] + */ + cfm = pt->cr_ifs; + nlocals = (cfm >> 7) & 0x7f; /* aka sol */ + nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */ + krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8; + ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19)); + + count = 0; + if (in_syscall(pt)) + count = min_t(int, args->n, nouts); + + /* Iterate over outs. */ + for (i = 0; i < count; i++) { + int j = ndirty + nlocals + i + args->i; + args->args[i] = *ia64_rse_skip_regs(krbs, j); + } + + while (i < args->n) { + args->args[i] = 0; + i++; + } +} + +void syscall_get_arguments(struct task_struct *task, + struct pt_regs *regs, unsigned long *args) +{ + struct syscall_get_args data = { + .i = 0, + .n = 6, + .args = args, + .regs = regs, + }; + + if (task == current) + unw_init_running(syscall_get_args_cb, &data); + else { + struct unw_frame_info ufi; + memset(&ufi, 0, sizeof(ufi)); + unw_init_from_blocked_task(&ufi, task); + syscall_get_args_cb(&ufi, &data); + } +} diff --git a/arch/ia64/kernel/relocate_kernel.S b/arch/ia64/kernel/relocate_kernel.S new file mode 100644 index 000000000000..527a7b896a6e --- /dev/null +++ b/arch/ia64/kernel/relocate_kernel.S @@ -0,0 +1,321 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * arch/ia64/kernel/relocate_kernel.S + * + * Relocate kexec'able kernel and start it + * + * Copyright (C) 2005 Hewlett-Packard Development Company, L.P. + * Copyright (C) 2005 Khalid Aziz + * Copyright (C) 2005 Intel Corp, Zou Nan hai + */ +#include +#include +#include +#include +#include + + /* Must be relocatable PIC code callable as a C function + */ +GLOBAL_ENTRY(relocate_new_kernel) + .prologue + alloc r31=ar.pfs,4,0,0,0 + .body +.reloc_entry: +{ + rsm psr.i| psr.ic + mov r2=ip +} + ;; +{ + flushrs // must be first insn in group + srlz.i +} + ;; + dep r2=0,r2,61,3 //to physical address + ;; + //first switch to physical mode + add r3=1f-.reloc_entry, r2 + movl r16 = IA64_PSR_AC|IA64_PSR_BN|IA64_PSR_IC + mov ar.rsc=0 // put RSE in enforced lazy mode + ;; + add sp=(memory_stack_end - 16 - .reloc_entry),r2 + add r8=(register_stack - .reloc_entry),r2 + ;; + mov r18=ar.rnat + mov ar.bspstore=r8 + ;; + mov cr.ipsr=r16 + mov cr.iip=r3 + mov cr.ifs=r0 + srlz.i + ;; + mov ar.rnat=r18 + rfi // note: this unmask MCA/INIT (psr.mc) + ;; +1: + //physical mode code begin + mov b6=in1 + dep r28=0,in2,61,3 //to physical address + + // purge all TC entries +#define O(member) IA64_CPUINFO_##member##_OFFSET + GET_THIS_PADDR(r2, ia64_cpu_info) // load phys addr of cpu_info into r2 + ;; + addl r17=O(PTCE_STRIDE),r2 + addl r2=O(PTCE_BASE),r2 + ;; + ld8 r18=[r2],(O(PTCE_COUNT)-O(PTCE_BASE));; // r18=ptce_base + ld4 r19=[r2],4 // r19=ptce_count[0] + ld4 r21=[r17],4 // r21=ptce_stride[0] + ;; + ld4 r20=[r2] // r20=ptce_count[1] + ld4 r22=[r17] // r22=ptce_stride[1] + mov r24=r0 + ;; + adds r20=-1,r20 + ;; +#undef O +2: + cmp.ltu p6,p7=r24,r19 +(p7) br.cond.dpnt.few 4f + mov ar.lc=r20 +3: + ptc.e r18 + ;; + add r18=r22,r18 + br.cloop.sptk.few 3b + ;; + add r18=r21,r18 + add r24=1,r24 + ;; + br.sptk.few 2b +4: + srlz.i + ;; + // purge TR entry for kernel text and data + movl r16=KERNEL_START + mov r18=KERNEL_TR_PAGE_SHIFT<<2 + ;; + ptr.i r16, r18 + ptr.d r16, r18 + ;; + srlz.i + ;; + + // purge TR entry for pal code + mov r16=in3 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.i r16,r18 + ;; + srlz.i + ;; + + // purge TR entry for stack + mov r16=IA64_KR(CURRENT_STACK) + ;; + shl r16=r16,IA64_GRANULE_SHIFT + movl r19=PAGE_OFFSET + ;; + add r16=r19,r16 + mov r18=IA64_GRANULE_SHIFT<<2 + ;; + ptr.d r16,r18 + ;; + srlz.i + ;; + + //copy segments + movl r16=PAGE_MASK + mov r30=in0 // in0 is page_list + br.sptk.few .dest_page + ;; +.loop: + ld8 r30=[in0], 8;; +.dest_page: + tbit.z p0, p6=r30, 0;; // 0x1 dest page +(p6) and r17=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 1;; // 0x2 indirect page +(p6) and in0=r30, r16 +(p6) br.cond.sptk.few .loop;; + + tbit.z p0, p6=r30, 2;; // 0x4 end flag +(p6) br.cond.sptk.few .end_loop;; + + tbit.z p6, p0=r30, 3;; // 0x8 source page +(p6) br.cond.sptk.few .loop + + and r18=r30, r16 + + // simple copy page, may optimize later + movl r14=PAGE_SIZE/8 - 1;; + mov ar.lc=r14;; +1: + ld8 r14=[r18], 8;; + st8 [r17]=r14;; + fc.i r17 + add r17=8, r17 + br.ctop.sptk.few 1b + br.sptk.few .loop + ;; + +.end_loop: + sync.i // for fc.i + ;; + srlz.i + ;; + srlz.d + ;; + br.call.sptk.many b0=b6;; + +.align 32 +memory_stack: + .fill 8192, 1, 0 +memory_stack_end: +register_stack: + .fill 8192, 1, 0 +register_stack_end: +relocate_new_kernel_end: +END(relocate_new_kernel) + +.global relocate_new_kernel_size +relocate_new_kernel_size: + data8 relocate_new_kernel_end - relocate_new_kernel + +GLOBAL_ENTRY(ia64_dump_cpu_regs) + .prologue + alloc loc0=ar.pfs,1,2,0,0 + .body + mov ar.rsc=0 // put RSE in enforced lazy mode + add loc1=4*8, in0 // save r4 and r5 first + ;; +{ + flushrs // flush dirty regs to backing store + srlz.i +} + st8 [loc1]=r4, 8 + ;; + st8 [loc1]=r5, 8 + ;; + add loc1=32*8, in0 + mov r4=ar.rnat + ;; + st8 [in0]=r0, 8 // r0 + st8 [loc1]=r4, 8 // rnat + mov r5=pr + ;; + st8 [in0]=r1, 8 // r1 + st8 [loc1]=r5, 8 // pr + mov r4=b0 + ;; + st8 [in0]=r2, 8 // r2 + st8 [loc1]=r4, 8 // b0 + mov r5=b1; + ;; + st8 [in0]=r3, 24 // r3 + st8 [loc1]=r5, 8 // b1 + mov r4=b2 + ;; + st8 [in0]=r6, 8 // r6 + st8 [loc1]=r4, 8 // b2 + mov r5=b3 + ;; + st8 [in0]=r7, 8 // r7 + st8 [loc1]=r5, 8 // b3 + mov r4=b4 + ;; + st8 [in0]=r8, 8 // r8 + st8 [loc1]=r4, 8 // b4 + mov r5=b5 + ;; + st8 [in0]=r9, 8 // r9 + st8 [loc1]=r5, 8 // b5 + mov r4=b6 + ;; + st8 [in0]=r10, 8 // r10 + st8 [loc1]=r5, 8 // b6 + mov r5=b7 + ;; + st8 [in0]=r11, 8 // r11 + st8 [loc1]=r5, 8 // b7 + mov r4=b0 + ;; + st8 [in0]=r12, 8 // r12 + st8 [loc1]=r4, 8 // ip + mov r5=loc0 + ;; + st8 [in0]=r13, 8 // r13 + extr.u r5=r5, 0, 38 // ar.pfs.pfm + mov r4=r0 // user mask + ;; + st8 [in0]=r14, 8 // r14 + st8 [loc1]=r5, 8 // cfm + ;; + st8 [in0]=r15, 8 // r15 + st8 [loc1]=r4, 8 // user mask + mov r5=ar.rsc + ;; + st8 [in0]=r16, 8 // r16 + st8 [loc1]=r5, 8 // ar.rsc + mov r4=ar.bsp + ;; + st8 [in0]=r17, 8 // r17 + st8 [loc1]=r4, 8 // ar.bsp + mov r5=ar.bspstore + ;; + st8 [in0]=r18, 8 // r18 + st8 [loc1]=r5, 8 // ar.bspstore + mov r4=ar.rnat + ;; + st8 [in0]=r19, 8 // r19 + st8 [loc1]=r4, 8 // ar.rnat + mov r5=ar.ccv + ;; + st8 [in0]=r20, 8 // r20 + st8 [loc1]=r5, 8 // ar.ccv + mov r4=ar.unat + ;; + st8 [in0]=r21, 8 // r21 + st8 [loc1]=r4, 8 // ar.unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r22, 8 // r22 + st8 [loc1]=r5, 8 // ar.fpsr + mov r4 = ar.unat + ;; + st8 [in0]=r23, 8 // r23 + st8 [loc1]=r4, 8 // unat + mov r5 = ar.fpsr + ;; + st8 [in0]=r24, 8 // r24 + st8 [loc1]=r5, 8 // fpsr + mov r4 = ar.pfs + ;; + st8 [in0]=r25, 8 // r25 + st8 [loc1]=r4, 8 // ar.pfs + mov r5 = ar.lc + ;; + st8 [in0]=r26, 8 // r26 + st8 [loc1]=r5, 8 // ar.lc + mov r4 = ar.ec + ;; + st8 [in0]=r27, 8 // r27 + st8 [loc1]=r4, 8 // ar.ec + mov r5 = ar.csd + ;; + st8 [in0]=r28, 8 // r28 + st8 [loc1]=r5, 8 // ar.csd + mov r4 = ar.ssd + ;; + st8 [in0]=r29, 8 // r29 + st8 [loc1]=r4, 8 // ar.ssd + ;; + st8 [in0]=r30, 8 // r30 + ;; + st8 [in0]=r31, 8 // r31 + mov ar.pfs=loc0 + ;; + br.ret.sptk.many rp +END(ia64_dump_cpu_regs) diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c new file mode 100644 index 000000000000..e4f0705c0282 --- /dev/null +++ b/arch/ia64/kernel/sal.c @@ -0,0 +1,400 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * System Abstraction Layer (SAL) interface routines. + * + * Copyright (C) 1998, 1999, 2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + __cacheline_aligned DEFINE_SPINLOCK(sal_lock); +unsigned long sal_platform_features; + +unsigned short sal_revision; +unsigned short sal_version; + +#define SAL_MAJOR(x) ((x) >> 8) +#define SAL_MINOR(x) ((x) & 0xff) + +static struct { + void *addr; /* function entry point */ + void *gpval; /* gp value to use */ +} pdesc; + +static long +default_handler (void) +{ + return -1; +} + +ia64_sal_handler ia64_sal = (ia64_sal_handler) default_handler; +ia64_sal_desc_ptc_t *ia64_ptc_domain_info; + +const char * +ia64_sal_strerror (long status) +{ + const char *str; + switch (status) { + case 0: str = "Call completed without error"; break; + case 1: str = "Effect a warm boot of the system to complete " + "the update"; break; + case -1: str = "Not implemented"; break; + case -2: str = "Invalid argument"; break; + case -3: str = "Call completed with error"; break; + case -4: str = "Virtual address not registered"; break; + case -5: str = "No information available"; break; + case -6: str = "Insufficient space to add the entry"; break; + case -7: str = "Invalid entry_addr value"; break; + case -8: str = "Invalid interrupt vector"; break; + case -9: str = "Requested memory not available"; break; + case -10: str = "Unable to write to the NVM device"; break; + case -11: str = "Invalid partition type specified"; break; + case -12: str = "Invalid NVM_Object id specified"; break; + case -13: str = "NVM_Object already has the maximum number " + "of partitions"; break; + case -14: str = "Insufficient space in partition for the " + "requested write sub-function"; break; + case -15: str = "Insufficient data buffer space for the " + "requested read record sub-function"; break; + case -16: str = "Scratch buffer required for the write/delete " + "sub-function"; break; + case -17: str = "Insufficient space in the NVM_Object for the " + "requested create sub-function"; break; + case -18: str = "Invalid value specified in the partition_rec " + "argument"; break; + case -19: str = "Record oriented I/O not supported for this " + "partition"; break; + case -20: str = "Bad format of record to be written or " + "required keyword variable not " + "specified"; break; + default: str = "Unknown SAL status code"; break; + } + return str; +} + +void __init +ia64_sal_handler_init (void *entry_point, void *gpval) +{ + /* fill in the SAL procedure descriptor and point ia64_sal to it: */ + pdesc.addr = entry_point; + pdesc.gpval = gpval; + ia64_sal = (ia64_sal_handler) &pdesc; +} + +static void __init +check_versions (struct ia64_sal_systab *systab) +{ + sal_revision = (systab->sal_rev_major << 8) | systab->sal_rev_minor; + sal_version = (systab->sal_b_rev_major << 8) | systab->sal_b_rev_minor; + + /* Check for broken firmware */ + if ((sal_revision == SAL_VERSION_CODE(49, 29)) + && (sal_version == SAL_VERSION_CODE(49, 29))) + { + /* + * Old firmware for zx2000 prototypes have this weird version number, + * reset it to something sane. + */ + sal_revision = SAL_VERSION_CODE(2, 8); + sal_version = SAL_VERSION_CODE(0, 0); + } +} + +static void __init +sal_desc_entry_point (void *p) +{ + struct ia64_sal_desc_entry_point *ep = p; + ia64_pal_handler_init(__va(ep->pal_proc)); + ia64_sal_handler_init(__va(ep->sal_proc), __va(ep->gp)); +} + +#ifdef CONFIG_SMP +static void __init +set_smp_redirect (int flag) +{ +#ifndef CONFIG_HOTPLUG_CPU + if (no_int_routing) + smp_int_redirect &= ~flag; + else + smp_int_redirect |= flag; +#else + /* + * For CPU Hotplug we dont want to do any chipset supported + * interrupt redirection. The reason is this would require that + * All interrupts be stopped and hard bind the irq to a cpu. + * Later when the interrupt is fired we need to set the redir hint + * on again in the vector. This is cumbersome for something that the + * user mode irq balancer will solve anyways. + */ + no_int_routing=1; + smp_int_redirect &= ~flag; +#endif +} +#else +#define set_smp_redirect(flag) do { } while (0) +#endif + +static void __init +sal_desc_platform_feature (void *p) +{ + struct ia64_sal_desc_platform_feature *pf = p; + sal_platform_features = pf->feature_mask; + + printk(KERN_INFO "SAL Platform features:"); + if (!sal_platform_features) { + printk(" None\n"); + return; + } + + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_BUS_LOCK) + printk(" BusLock"); + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT) { + printk(" IRQ_Redirection"); + set_smp_redirect(SMP_IRQ_REDIRECTION); + } + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT) { + printk(" IPI_Redirection"); + set_smp_redirect(SMP_IPI_REDIRECTION); + } + if (sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT) + printk(" ITC_Drift"); + printk("\n"); +} + +#ifdef CONFIG_SMP +static void __init +sal_desc_ap_wakeup (void *p) +{ + struct ia64_sal_desc_ap_wakeup *ap = p; + + switch (ap->mechanism) { + case IA64_SAL_AP_EXTERNAL_INT: + ap_wakeup_vector = ap->vector; + printk(KERN_INFO "SAL: AP wakeup using external interrupt " + "vector 0x%lx\n", ap_wakeup_vector); + break; + default: + printk(KERN_ERR "SAL: AP wakeup mechanism unsupported!\n"); + break; + } +} + +static void __init +chk_nointroute_opt(void) +{ + char *cp; + + for (cp = boot_command_line; *cp; ) { + if (memcmp(cp, "nointroute", 10) == 0) { + no_int_routing = 1; + printk ("no_int_routing on\n"); + break; + } else { + while (*cp != ' ' && *cp) + ++cp; + while (*cp == ' ') + ++cp; + } + } +} + +#else +static void __init sal_desc_ap_wakeup(void *p) { } +#endif + +/* + * HP rx5670 firmware polls for interrupts during SAL_CACHE_FLUSH by reading + * cr.ivr, but it never writes cr.eoi. This leaves any interrupt marked as + * "in-service" and masks other interrupts of equal or lower priority. + * + * HP internal defect reports: F1859, F2775, F3031. + */ +static int sal_cache_flush_drops_interrupts; + +static int __init +force_pal_cache_flush(char *str) +{ + sal_cache_flush_drops_interrupts = 1; + return 0; +} +early_param("force_pal_cache_flush", force_pal_cache_flush); + +void __init +check_sal_cache_flush (void) +{ + unsigned long flags; + int cpu; + u64 vector, cache_type = 3; + struct ia64_sal_retval isrv; + + if (sal_cache_flush_drops_interrupts) + return; + + cpu = get_cpu(); + local_irq_save(flags); + + /* + * Send ourselves a timer interrupt, wait until it's reported, and see + * if SAL_CACHE_FLUSH drops it. + */ + ia64_send_ipi(cpu, IA64_TIMER_VECTOR, IA64_IPI_DM_INT, 0); + + while (!ia64_get_irr(IA64_TIMER_VECTOR)) + cpu_relax(); + + SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); + + if (isrv.status) + printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status); + + if (ia64_get_irr(IA64_TIMER_VECTOR)) { + vector = ia64_get_ivr(); + ia64_eoi(); + WARN_ON(vector != IA64_TIMER_VECTOR); + } else { + sal_cache_flush_drops_interrupts = 1; + printk(KERN_ERR "SAL: SAL_CACHE_FLUSH drops interrupts; " + "PAL_CACHE_FLUSH will be used instead\n"); + ia64_eoi(); + } + + local_irq_restore(flags); + put_cpu(); +} + +s64 +ia64_sal_cache_flush (u64 cache_type) +{ + struct ia64_sal_retval isrv; + + if (sal_cache_flush_drops_interrupts) { + unsigned long flags; + u64 progress; + s64 rc; + + progress = 0; + local_irq_save(flags); + rc = ia64_pal_cache_flush(cache_type, + PAL_CACHE_FLUSH_INVALIDATE, &progress, NULL); + local_irq_restore(flags); + return rc; + } + + SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0); + return isrv.status; +} +EXPORT_SYMBOL_GPL(ia64_sal_cache_flush); + +void __init +ia64_sal_init (struct ia64_sal_systab *systab) +{ + char *p; + int i; + + if (!systab) { + printk(KERN_WARNING "Hmm, no SAL System Table.\n"); + return; + } + + if (strncmp(systab->signature, "SST_", 4) != 0) + printk(KERN_ERR "bad signature in system table!"); + + check_versions(systab); +#ifdef CONFIG_SMP + chk_nointroute_opt(); +#endif + + /* revisions are coded in BCD, so %x does the job for us */ + printk(KERN_INFO "SAL %x.%x: %.32s %.32s%sversion %x.%x\n", + SAL_MAJOR(sal_revision), SAL_MINOR(sal_revision), + systab->oem_id, systab->product_id, + systab->product_id[0] ? " " : "", + SAL_MAJOR(sal_version), SAL_MINOR(sal_version)); + + p = (char *) (systab + 1); + for (i = 0; i < systab->entry_count; i++) { + /* + * The first byte of each entry type contains the type + * descriptor. + */ + switch (*p) { + case SAL_DESC_ENTRY_POINT: + sal_desc_entry_point(p); + break; + case SAL_DESC_PLATFORM_FEATURE: + sal_desc_platform_feature(p); + break; + case SAL_DESC_PTC: + ia64_ptc_domain_info = (ia64_sal_desc_ptc_t *)p; + break; + case SAL_DESC_AP_WAKEUP: + sal_desc_ap_wakeup(p); + break; + } + p += SAL_DESC_SIZE(*p); + } + +} + +int +ia64_sal_oemcall(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall); + +int +ia64_sal_oemcall_nolock(struct ia64_sal_retval *isrvp, u64 oemfunc, u64 arg1, + u64 arg2, u64 arg3, u64 arg4, u64 arg5, u64 arg6, + u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL_NOLOCK(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, + arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall_nolock); + +int +ia64_sal_oemcall_reentrant(struct ia64_sal_retval *isrvp, u64 oemfunc, + u64 arg1, u64 arg2, u64 arg3, u64 arg4, u64 arg5, + u64 arg6, u64 arg7) +{ + if (oemfunc < IA64_SAL_OEMFUNC_MIN || oemfunc > IA64_SAL_OEMFUNC_MAX) + return -1; + SAL_CALL_REENTRANT(*isrvp, oemfunc, arg1, arg2, arg3, arg4, arg5, arg6, + arg7); + return 0; +} +EXPORT_SYMBOL(ia64_sal_oemcall_reentrant); + +long +ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second, + unsigned long *drift_info) +{ + struct ia64_sal_retval isrv; + + SAL_CALL(isrv, SAL_FREQ_BASE, which, 0, 0, 0, 0, 0, 0); + *ticks_per_second = isrv.v0; + *drift_info = isrv.v1; + return isrv.status; +} +EXPORT_SYMBOL_GPL(ia64_sal_freq_base); diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c new file mode 100644 index 000000000000..03b632c56899 --- /dev/null +++ b/arch/ia64/kernel/salinfo.c @@ -0,0 +1,646 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * salinfo.c + * + * Creates entries in /proc/sal for various system features. + * + * Copyright (c) 2003, 2006 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2003 Hewlett-Packard Co + * Bjorn Helgaas + * + * 10/30/2001 jbarnes@sgi.com copied much of Stephane's palinfo + * code to create this file + * Oct 23 2003 kaos@sgi.com + * Replace IPI with set_cpus_allowed() to read a record from the required cpu. + * Redesign salinfo log processing to separate interrupt and user space + * contexts. + * Cache the record across multi-block reads from user space. + * Support > 64 cpus. + * Delete module_exit and MOD_INC/DEC_COUNT, salinfo cannot be a module. + * + * Jan 28 2004 kaos@sgi.com + * Periodically check for outstanding MCA or INIT records. + * + * Dec 5 2004 kaos@sgi.com + * Standardize which records are cleared automatically. + * + * Aug 18 2005 kaos@sgi.com + * mca.c may not pass a buffer, a NULL buffer just indicates that a new + * record is available in SAL. + * Replace some NR_CPUS by cpus_online, for hotplug cpu. + * + * Jan 5 2006 kaos@sgi.com + * Handle hotplug cpus coming online. + * Handle hotplug cpus going offline while they still have outstanding records. + * Use the cpu_* macros consistently. + * Replace the counting semaphore with a mutex and a test if the cpumask is non-empty. + * Modify the locking to make the test for "work to do" an atomic operation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Jesse Barnes "); +MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); +MODULE_LICENSE("GPL"); + +typedef struct { + const char *name; /* name of the proc entry */ + unsigned long feature; /* feature bit */ + struct proc_dir_entry *entry; /* registered entry (removal) */ +} salinfo_entry_t; + +/* + * List {name,feature} pairs for every entry in /proc/sal/ + * that this module exports + */ +static const salinfo_entry_t salinfo_entries[]={ + { "bus_lock", IA64_SAL_PLATFORM_FEATURE_BUS_LOCK, }, + { "irq_redirection", IA64_SAL_PLATFORM_FEATURE_IRQ_REDIR_HINT, }, + { "ipi_redirection", IA64_SAL_PLATFORM_FEATURE_IPI_REDIR_HINT, }, + { "itc_drift", IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT, }, +}; + +#define NR_SALINFO_ENTRIES ARRAY_SIZE(salinfo_entries) + +static char *salinfo_log_name[] = { + "mca", + "init", + "cmc", + "cpe", +}; + +static struct proc_dir_entry *salinfo_proc_entries[ + ARRAY_SIZE(salinfo_entries) + /* /proc/sal/bus_lock */ + ARRAY_SIZE(salinfo_log_name) + /* /proc/sal/{mca,...} */ + (2 * ARRAY_SIZE(salinfo_log_name)) + /* /proc/sal/mca/{event,data} */ + 1]; /* /proc/sal */ + +/* Some records we get ourselves, some are accessed as saved data in buffers + * that are owned by mca.c. + */ +struct salinfo_data_saved { + u8* buffer; + u64 size; + u64 id; + int cpu; +}; + +/* State transitions. Actions are :- + * Write "read " to the data file. + * Write "clear " to the data file. + * Write "oemdata to the data file. + * Read from the data file. + * Close the data file. + * + * Start state is NO_DATA. + * + * NO_DATA + * write "read " -> NO_DATA or LOG_RECORD. + * write "clear " -> NO_DATA or LOG_RECORD. + * write "oemdata -> return -EINVAL. + * read data -> return EOF. + * close -> unchanged. Free record areas. + * + * LOG_RECORD + * write "read " -> NO_DATA or LOG_RECORD. + * write "clear " -> NO_DATA or LOG_RECORD. + * write "oemdata -> format the oem data, goto OEMDATA. + * read data -> return the INIT/MCA/CMC/CPE record. + * close -> unchanged. Keep record areas. + * + * OEMDATA + * write "read " -> NO_DATA or LOG_RECORD. + * write "clear " -> NO_DATA or LOG_RECORD. + * write "oemdata -> format the oem data, goto OEMDATA. + * read data -> return the formatted oemdata. + * close -> unchanged. Keep record areas. + * + * Closing the data file does not change the state. This allows shell scripts + * to manipulate salinfo data, each shell redirection opens the file, does one + * action then closes it again. The record areas are only freed at close when + * the state is NO_DATA. + */ +enum salinfo_state { + STATE_NO_DATA, + STATE_LOG_RECORD, + STATE_OEMDATA, +}; + +struct salinfo_data { + cpumask_t cpu_event; /* which cpus have outstanding events */ + wait_queue_head_t read_wait; + u8 *log_buffer; + u64 log_size; + u8 *oemdata; /* decoded oem data */ + u64 oemdata_size; + int open; /* single-open to prevent races */ + u8 type; + u8 saved_num; /* using a saved record? */ + enum salinfo_state state :8; /* processing state */ + u8 padding; + int cpu_check; /* next CPU to check */ + struct salinfo_data_saved data_saved[5];/* save last 5 records from mca.c, must be < 255 */ +}; + +static struct salinfo_data salinfo_data[ARRAY_SIZE(salinfo_log_name)]; + +static DEFINE_SPINLOCK(data_lock); +static DEFINE_SPINLOCK(data_saved_lock); + +/** salinfo_platform_oemdata - optional callback to decode oemdata from an error + * record. + * @sect_header: pointer to the start of the section to decode. + * @oemdata: returns vmalloc area containing the decoded output. + * @oemdata_size: returns length of decoded output (strlen). + * + * Description: If user space asks for oem data to be decoded by the kernel + * and/or prom and the platform has set salinfo_platform_oemdata to the address + * of a platform specific routine then call that routine. salinfo_platform_oemdata + * vmalloc's and formats its output area, returning the address of the text + * and its strlen. Returns 0 for success, -ve for error. The callback is + * invoked on the cpu that generated the error record. + */ +int (*salinfo_platform_oemdata)(const u8 *sect_header, u8 **oemdata, u64 *oemdata_size); + +struct salinfo_platform_oemdata_parms { + const u8 *efi_guid; + u8 **oemdata; + u64 *oemdata_size; +}; + +static long +salinfo_platform_oemdata_cpu(void *context) +{ + struct salinfo_platform_oemdata_parms *parms = context; + + return salinfo_platform_oemdata(parms->efi_guid, parms->oemdata, parms->oemdata_size); +} + +static void +shift1_data_saved (struct salinfo_data *data, int shift) +{ + memcpy(data->data_saved+shift, data->data_saved+shift+1, + (ARRAY_SIZE(data->data_saved) - (shift+1)) * sizeof(data->data_saved[0])); + memset(data->data_saved + ARRAY_SIZE(data->data_saved) - 1, 0, + sizeof(data->data_saved[0])); +} + +/* This routine is invoked in interrupt context. Note: mca.c enables + * interrupts before calling this code for CMC/CPE. MCA and INIT events are + * not irq safe, do not call any routines that use spinlocks, they may deadlock. + * MCA and INIT records are recorded, a timer event will look for any + * outstanding events and wake up the user space code. + * + * The buffer passed from mca.c points to the output from ia64_log_get. This is + * a persistent buffer but its contents can change between the interrupt and + * when user space processes the record. Save the record id to identify + * changes. If the buffer is NULL then just update the bitmap. + */ +void +salinfo_log_wakeup(int type, u8 *buffer, u64 size, int irqsafe) +{ + struct salinfo_data *data = salinfo_data + type; + struct salinfo_data_saved *data_saved; + unsigned long flags = 0; + int i; + int saved_size = ARRAY_SIZE(data->data_saved); + + BUG_ON(type >= ARRAY_SIZE(salinfo_log_name)); + + if (irqsafe) + spin_lock_irqsave(&data_saved_lock, flags); + if (buffer) { + for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { + if (!data_saved->buffer) + break; + } + if (i == saved_size) { + if (!data->saved_num) { + shift1_data_saved(data, 0); + data_saved = data->data_saved + saved_size - 1; + } else + data_saved = NULL; + } + if (data_saved) { + data_saved->cpu = smp_processor_id(); + data_saved->id = ((sal_log_record_header_t *)buffer)->id; + data_saved->size = size; + data_saved->buffer = buffer; + } + } + cpumask_set_cpu(smp_processor_id(), &data->cpu_event); + if (irqsafe) { + wake_up_interruptible(&data->read_wait); + spin_unlock_irqrestore(&data_saved_lock, flags); + } +} + +/* Check for outstanding MCA/INIT records every minute (arbitrary) */ +#define SALINFO_TIMER_DELAY (60*HZ) +static struct timer_list salinfo_timer; +extern void ia64_mlogbuf_dump(void); + +static void +salinfo_timeout_check(struct salinfo_data *data) +{ + if (!data->open) + return; + if (!cpumask_empty(&data->cpu_event)) + wake_up_interruptible(&data->read_wait); +} + +static void +salinfo_timeout(struct timer_list *unused) +{ + ia64_mlogbuf_dump(); + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_MCA); + salinfo_timeout_check(salinfo_data + SAL_INFO_TYPE_INIT); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + add_timer(&salinfo_timer); +} + +static int +salinfo_event_open(struct inode *inode, struct file *file) +{ + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + return 0; +} + +static ssize_t +salinfo_event_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +{ + struct salinfo_data *data = pde_data(file_inode(file)); + char cmd[32]; + size_t size; + int i, n, cpu = -1; + +retry: + if (cpumask_empty(&data->cpu_event)) { + if (file->f_flags & O_NONBLOCK) + return -EAGAIN; + if (wait_event_interruptible(data->read_wait, + !cpumask_empty(&data->cpu_event))) + return -EINTR; + } + + n = data->cpu_check; + for (i = 0; i < nr_cpu_ids; i++) { + if (cpumask_test_cpu(n, &data->cpu_event)) { + if (!cpu_online(n)) { + cpumask_clear_cpu(n, &data->cpu_event); + continue; + } + cpu = n; + break; + } + if (++n == nr_cpu_ids) + n = 0; + } + + if (cpu == -1) + goto retry; + + ia64_mlogbuf_dump(); + + /* for next read, start checking at next CPU */ + data->cpu_check = cpu; + if (++data->cpu_check == nr_cpu_ids) + data->cpu_check = 0; + + snprintf(cmd, sizeof(cmd), "read %d\n", cpu); + + size = strlen(cmd); + if (size > count) + size = count; + if (copy_to_user(buffer, cmd, size)) + return -EFAULT; + + return size; +} + +static const struct proc_ops salinfo_event_proc_ops = { + .proc_open = salinfo_event_open, + .proc_read = salinfo_event_read, + .proc_lseek = noop_llseek, +}; + +static int +salinfo_log_open(struct inode *inode, struct file *file) +{ + struct salinfo_data *data = pde_data(inode); + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock(&data_lock); + if (data->open) { + spin_unlock(&data_lock); + return -EBUSY; + } + data->open = 1; + spin_unlock(&data_lock); + + if (data->state == STATE_NO_DATA && + !(data->log_buffer = vmalloc(ia64_sal_get_state_info_size(data->type)))) { + data->open = 0; + return -ENOMEM; + } + + return 0; +} + +static int +salinfo_log_release(struct inode *inode, struct file *file) +{ + struct salinfo_data *data = pde_data(inode); + + if (data->state == STATE_NO_DATA) { + vfree(data->log_buffer); + vfree(data->oemdata); + data->log_buffer = NULL; + data->oemdata = NULL; + } + spin_lock(&data_lock); + data->open = 0; + spin_unlock(&data_lock); + return 0; +} + +static long +salinfo_log_read_cpu(void *context) +{ + struct salinfo_data *data = context; + sal_log_record_header_t *rh; + data->log_size = ia64_sal_get_state_info(data->type, (u64 *) data->log_buffer); + rh = (sal_log_record_header_t *)(data->log_buffer); + /* Clear corrected errors as they are read from SAL */ + if (rh->severity == sal_log_severity_corrected) + ia64_sal_clear_state_info(data->type); + return 0; +} + +static void +salinfo_log_new_read(int cpu, struct salinfo_data *data) +{ + struct salinfo_data_saved *data_saved; + unsigned long flags; + int i; + int saved_size = ARRAY_SIZE(data->data_saved); + + data->saved_num = 0; + spin_lock_irqsave(&data_saved_lock, flags); +retry: + for (i = 0, data_saved = data->data_saved; i < saved_size; ++i, ++data_saved) { + if (data_saved->buffer && data_saved->cpu == cpu) { + sal_log_record_header_t *rh = (sal_log_record_header_t *)(data_saved->buffer); + data->log_size = data_saved->size; + memcpy(data->log_buffer, rh, data->log_size); + barrier(); /* id check must not be moved */ + if (rh->id == data_saved->id) { + data->saved_num = i+1; + break; + } + /* saved record changed by mca.c since interrupt, discard it */ + shift1_data_saved(data, i); + goto retry; + } + } + spin_unlock_irqrestore(&data_saved_lock, flags); + + if (!data->saved_num) + work_on_cpu_safe(cpu, salinfo_log_read_cpu, data); + if (!data->log_size) { + data->state = STATE_NO_DATA; + cpumask_clear_cpu(cpu, &data->cpu_event); + } else { + data->state = STATE_LOG_RECORD; + } +} + +static ssize_t +salinfo_log_read(struct file *file, char __user *buffer, size_t count, loff_t *ppos) +{ + struct salinfo_data *data = pde_data(file_inode(file)); + u8 *buf; + u64 bufsize; + + if (data->state == STATE_LOG_RECORD) { + buf = data->log_buffer; + bufsize = data->log_size; + } else if (data->state == STATE_OEMDATA) { + buf = data->oemdata; + bufsize = data->oemdata_size; + } else { + buf = NULL; + bufsize = 0; + } + return simple_read_from_buffer(buffer, count, ppos, buf, bufsize); +} + +static long +salinfo_log_clear_cpu(void *context) +{ + struct salinfo_data *data = context; + + ia64_sal_clear_state_info(data->type); + return 0; +} + +static int +salinfo_log_clear(struct salinfo_data *data, int cpu) +{ + sal_log_record_header_t *rh; + unsigned long flags; + spin_lock_irqsave(&data_saved_lock, flags); + data->state = STATE_NO_DATA; + if (!cpumask_test_cpu(cpu, &data->cpu_event)) { + spin_unlock_irqrestore(&data_saved_lock, flags); + return 0; + } + cpumask_clear_cpu(cpu, &data->cpu_event); + if (data->saved_num) { + shift1_data_saved(data, data->saved_num - 1); + data->saved_num = 0; + } + spin_unlock_irqrestore(&data_saved_lock, flags); + rh = (sal_log_record_header_t *)(data->log_buffer); + /* Corrected errors have already been cleared from SAL */ + if (rh->severity != sal_log_severity_corrected) + work_on_cpu_safe(cpu, salinfo_log_clear_cpu, data); + /* clearing a record may make a new record visible */ + salinfo_log_new_read(cpu, data); + if (data->state == STATE_LOG_RECORD) { + spin_lock_irqsave(&data_saved_lock, flags); + cpumask_set_cpu(cpu, &data->cpu_event); + wake_up_interruptible(&data->read_wait); + spin_unlock_irqrestore(&data_saved_lock, flags); + } + return 0; +} + +static ssize_t +salinfo_log_write(struct file *file, const char __user *buffer, size_t count, loff_t *ppos) +{ + struct salinfo_data *data = pde_data(file_inode(file)); + char cmd[32]; + size_t size; + u32 offset; + int cpu; + + size = sizeof(cmd); + if (count < size) + size = count; + if (copy_from_user(cmd, buffer, size)) + return -EFAULT; + + if (sscanf(cmd, "read %d", &cpu) == 1) { + salinfo_log_new_read(cpu, data); + } else if (sscanf(cmd, "clear %d", &cpu) == 1) { + int ret; + if ((ret = salinfo_log_clear(data, cpu))) + count = ret; + } else if (sscanf(cmd, "oemdata %d %d", &cpu, &offset) == 2) { + if (data->state != STATE_LOG_RECORD && data->state != STATE_OEMDATA) + return -EINVAL; + if (offset > data->log_size - sizeof(efi_guid_t)) + return -EINVAL; + data->state = STATE_OEMDATA; + if (salinfo_platform_oemdata) { + struct salinfo_platform_oemdata_parms parms = { + .efi_guid = data->log_buffer + offset, + .oemdata = &data->oemdata, + .oemdata_size = &data->oemdata_size + }; + count = work_on_cpu_safe(cpu, salinfo_platform_oemdata_cpu, + &parms); + } else + data->oemdata_size = 0; + } else + return -EINVAL; + + return count; +} + +static const struct proc_ops salinfo_data_proc_ops = { + .proc_open = salinfo_log_open, + .proc_release = salinfo_log_release, + .proc_read = salinfo_log_read, + .proc_write = salinfo_log_write, + .proc_lseek = default_llseek, +}; + +static int salinfo_cpu_online(unsigned int cpu) +{ + unsigned int i, end = ARRAY_SIZE(salinfo_data); + struct salinfo_data *data; + + spin_lock_irq(&data_saved_lock); + for (i = 0, data = salinfo_data; i < end; ++i, ++data) { + cpumask_set_cpu(cpu, &data->cpu_event); + wake_up_interruptible(&data->read_wait); + } + spin_unlock_irq(&data_saved_lock); + return 0; +} + +static int salinfo_cpu_pre_down(unsigned int cpu) +{ + unsigned int i, end = ARRAY_SIZE(salinfo_data); + struct salinfo_data *data; + + spin_lock_irq(&data_saved_lock); + for (i = 0, data = salinfo_data; i < end; ++i, ++data) { + struct salinfo_data_saved *data_saved; + int j = ARRAY_SIZE(data->data_saved) - 1; + + for (data_saved = data->data_saved + j; j >= 0; + --j, --data_saved) { + if (data_saved->buffer && data_saved->cpu == cpu) + shift1_data_saved(data, j); + } + cpumask_clear_cpu(cpu, &data->cpu_event); + } + spin_unlock_irq(&data_saved_lock); + return 0; +} + +/* + * 'data' contains an integer that corresponds to the feature we're + * testing + */ +static int __maybe_unused proc_salinfo_show(struct seq_file *m, void *v) +{ + unsigned long data = (unsigned long)v; + seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); + return 0; +} + +static int __init +salinfo_init(void) +{ + struct proc_dir_entry *salinfo_dir; /* /proc/sal dir entry */ + struct proc_dir_entry **sdir = salinfo_proc_entries; /* keeps track of every entry */ + struct proc_dir_entry *dir, *entry; + struct salinfo_data *data; + int i; + + salinfo_dir = proc_mkdir("sal", NULL); + if (!salinfo_dir) + return 0; + + for (i=0; i < NR_SALINFO_ENTRIES; i++) { + /* pass the feature bit in question as misc data */ + *sdir++ = proc_create_single_data(salinfo_entries[i].name, 0, + salinfo_dir, proc_salinfo_show, + (void *)salinfo_entries[i].feature); + } + + for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { + data = salinfo_data + i; + data->type = i; + init_waitqueue_head(&data->read_wait); + dir = proc_mkdir(salinfo_log_name[i], salinfo_dir); + if (!dir) + continue; + + entry = proc_create_data("event", S_IRUSR, dir, + &salinfo_event_proc_ops, data); + if (!entry) + continue; + *sdir++ = entry; + + entry = proc_create_data("data", S_IRUSR | S_IWUSR, dir, + &salinfo_data_proc_ops, data); + if (!entry) + continue; + *sdir++ = entry; + + *sdir++ = dir; + } + + *sdir++ = salinfo_dir; + + timer_setup(&salinfo_timer, salinfo_timeout, 0); + salinfo_timer.expires = jiffies + SALINFO_TIMER_DELAY; + add_timer(&salinfo_timer); + + i = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/salinfo:online", + salinfo_cpu_online, salinfo_cpu_pre_down); + WARN_ON(i < 0); + return 0; +} + +module_init(salinfo_init); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c new file mode 100644 index 000000000000..f6d6a3b5d1f0 --- /dev/null +++ b/arch/ia64/kernel/setup.c @@ -0,0 +1,1083 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Architecture-specific setup. + * + * Copyright (C) 1998-2001, 2003-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2000, 2004 Intel Corp + * Rohit Seth + * Suresh Siddha + * Gordon Jin + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * + * 12/26/04 S.Siddha, G.Jin, R.Seth + * Add multi-threading and multi-core detection + * 11/12/01 D.Mosberger Convert get_cpuinfo() to seq_file based show_cpuinfo(). + * 04/04/00 D.Mosberger renamed cpu_initialized to cpu_online_map + * 03/31/00 R.Seth cpu_initialized and current->processor fixes + * 02/04/00 D.Mosberger some more get_cpuinfo fixes... + * 02/01/00 R.Seth fixed get_cpuinfo for SMP + * 01/07/99 S.Eranian added the support for command line argument + * 06/24/99 W.Drummond added boot_cpu_data. + * 05/28/05 Z. Menyhart Dynamic stride size for "flush_icache_range()" + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(CONFIG_SMP) && (IA64_CPU_SIZE > PAGE_SIZE) +# error "struct cpuinfo_ia64 too big!" +#endif + +char ia64_platform_name[64]; + +#ifdef CONFIG_SMP +unsigned long __per_cpu_offset[NR_CPUS]; +EXPORT_SYMBOL(__per_cpu_offset); +#endif + +DEFINE_PER_CPU(struct cpuinfo_ia64, ia64_cpu_info); +EXPORT_SYMBOL(ia64_cpu_info); +DEFINE_PER_CPU(unsigned long, local_per_cpu_offset); +#ifdef CONFIG_SMP +EXPORT_SYMBOL(local_per_cpu_offset); +#endif +unsigned long ia64_cycles_per_usec; +struct ia64_boot_param *ia64_boot_param; +struct screen_info screen_info; +unsigned long vga_console_iobase; +unsigned long vga_console_membase; + +static struct resource data_resource = { + .name = "Kernel data", + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource code_resource = { + .name = "Kernel code", + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +static struct resource bss_resource = { + .name = "Kernel bss", + .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM +}; + +unsigned long ia64_max_cacheline_size; + +unsigned long ia64_iobase; /* virtual address for I/O accesses */ +EXPORT_SYMBOL(ia64_iobase); +struct io_space io_space[MAX_IO_SPACES]; +EXPORT_SYMBOL(io_space); +unsigned int num_io_spaces; + +/* + * "flush_icache_range()" needs to know what processor dependent stride size to use + * when it makes i-cache(s) coherent with d-caches. + */ +#define I_CACHE_STRIDE_SHIFT 5 /* Safest way to go: 32 bytes by 32 bytes */ +unsigned long ia64_i_cache_stride_shift = ~0; +/* + * "clflush_cache_range()" needs to know what processor dependent stride size to + * use when it flushes cache lines including both d-cache and i-cache. + */ +/* Safest way to go: 32 bytes by 32 bytes */ +#define CACHE_STRIDE_SHIFT 5 +unsigned long ia64_cache_stride_shift = ~0; + +/* + * We use a special marker for the end of memory and it uses the extra (+1) slot + */ +struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; +static int num_rsvd_regions __initdata; + + +/* + * Filter incoming memory segments based on the primitive map created from the boot + * parameters. Segments contained in the map are removed from the memory ranges. A + * caller-specified function is called with the memory ranges that remain after filtering. + * This routine does not assume the incoming segments are sorted. + */ +int __init +filter_rsvd_memory (u64 start, u64 end, void *arg) +{ + u64 range_start, range_end, prev_start; + void (*func)(unsigned long, unsigned long, int); + int i; + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) return 0; + } +#endif + /* + * lowest possible address(walker uses virtual) + */ + prev_start = PAGE_OFFSET; + func = arg; + + for (i = 0; i < num_rsvd_regions; ++i) { + range_start = max(start, prev_start); + range_end = min(end, rsvd_region[i].start); + + if (range_start < range_end) + call_pernode_memory(__pa(range_start), range_end - range_start, func); + + /* nothing more available in this segment */ + if (range_end == end) return 0; + + prev_start = rsvd_region[i].end; + } + /* end of memory marker allows full processing inside loop body */ + return 0; +} + +/* + * Similar to "filter_rsvd_memory()", but the reserved memory ranges + * are not filtered out. + */ +int __init +filter_memory(u64 start, u64 end, void *arg) +{ + void (*func)(unsigned long, unsigned long, int); + +#if IGNORE_PFN0 + if (start == PAGE_OFFSET) { + printk(KERN_WARNING "warning: skipping physical page 0\n"); + start += PAGE_SIZE; + if (start >= end) + return 0; + } +#endif + func = arg; + if (start < end) + call_pernode_memory(__pa(start), end - start, func); + return 0; +} + +static void __init +sort_regions (struct rsvd_region *rsvd_region, int max) +{ + int j; + + /* simple bubble sorting */ + while (max--) { + for (j = 0; j < max; ++j) { + if (rsvd_region[j].start > rsvd_region[j+1].start) { + swap(rsvd_region[j], rsvd_region[j + 1]); + } + } + } +} + +/* merge overlaps */ +static int __init +merge_regions (struct rsvd_region *rsvd_region, int max) +{ + int i; + for (i = 1; i < max; ++i) { + if (rsvd_region[i].start >= rsvd_region[i-1].end) + continue; + if (rsvd_region[i].end > rsvd_region[i-1].end) + rsvd_region[i-1].end = rsvd_region[i].end; + --max; + memmove(&rsvd_region[i], &rsvd_region[i+1], + (max - i) * sizeof(struct rsvd_region)); + } + return max; +} + +/* + * Request address space for all standard resources + */ +static int __init register_memory(void) +{ + code_resource.start = ia64_tpa(_text); + code_resource.end = ia64_tpa(_etext) - 1; + data_resource.start = ia64_tpa(_etext); + data_resource.end = ia64_tpa(_edata) - 1; + bss_resource.start = ia64_tpa(__bss_start); + bss_resource.end = ia64_tpa(_end) - 1; + efi_initialize_iomem_resources(&code_resource, &data_resource, + &bss_resource); + + return 0; +} + +__initcall(register_memory); + + +#ifdef CONFIG_KEXEC + +/* + * This function checks if the reserved crashkernel is allowed on the specific + * IA64 machine flavour. Machines without an IO TLB use swiotlb and require + * some memory below 4 GB (i.e. in 32 bit area), see the implementation of + * kernel/dma/swiotlb.c. The hpzx1 architecture has an IO TLB but cannot use that + * in kdump case. See the comment in sba_init() in sba_iommu.c. + * + * So, the only machvec that really supports loading the kdump kernel + * over 4 GB is "uv". + */ +static int __init check_crashkernel_memory(unsigned long pbase, size_t size) +{ + if (is_uv_system()) + return 1; + else + return pbase < (1UL << 32); +} + +static void __init setup_crashkernel(unsigned long total, int *n) +{ + unsigned long long base = 0, size = 0; + int ret; + + ret = parse_crashkernel(boot_command_line, total, + &size, &base, NULL, NULL); + if (ret == 0 && size > 0) { + if (!base) { + sort_regions(rsvd_region, *n); + *n = merge_regions(rsvd_region, *n); + base = kdump_find_rsvd_region(size, + rsvd_region, *n); + } + + if (!check_crashkernel_memory(base, size)) { + pr_warn("crashkernel: There would be kdump memory " + "at %ld GB but this is unusable because it " + "must\nbe below 4 GB. Change the memory " + "configuration of the machine.\n", + (unsigned long)(base >> 30)); + return; + } + + if (base != ~0UL) { + printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " + "for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(size >> 20), + (unsigned long)(base >> 20), + (unsigned long)(total >> 20)); + rsvd_region[*n].start = + (unsigned long)__va(base); + rsvd_region[*n].end = + (unsigned long)__va(base + size); + (*n)++; + crashk_res.start = base; + crashk_res.end = base + size - 1; + } + } + efi_memmap_res.start = ia64_boot_param->efi_memmap; + efi_memmap_res.end = efi_memmap_res.start + + ia64_boot_param->efi_memmap_size; + boot_param_res.start = __pa(ia64_boot_param); + boot_param_res.end = boot_param_res.start + + sizeof(*ia64_boot_param); +} +#else +static inline void __init setup_crashkernel(unsigned long total, int *n) +{} +#endif + +#ifdef CONFIG_CRASH_DUMP +static int __init reserve_elfcorehdr(u64 *start, u64 *end) +{ + u64 length; + + /* We get the address using the kernel command line, + * but the size is extracted from the EFI tables. + * Both address and size are required for reservation + * to work properly. + */ + + if (!is_vmcore_usable()) + return -EINVAL; + + if ((length = vmcore_find_descriptor_size(elfcorehdr_addr)) == 0) { + vmcore_unusable(); + return -EINVAL; + } + + *start = (unsigned long)__va(elfcorehdr_addr); + *end = *start + length; + return 0; +} +#endif /* CONFIG_CRASH_DUMP */ + +/** + * reserve_memory - setup reserved memory areas + * + * Setup the reserved memory areas set aside for the boot parameters, + * initrd, etc. There are currently %IA64_MAX_RSVD_REGIONS defined, + * see arch/ia64/include/asm/meminit.h if you need to define more. + */ +void __init +reserve_memory (void) +{ + int n = 0; + unsigned long total_memory; + + /* + * none of the entries in this table overlap + */ + rsvd_region[n].start = (unsigned long) ia64_boot_param; + rsvd_region[n].end = rsvd_region[n].start + sizeof(*ia64_boot_param); + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->efi_memmap); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->efi_memmap_size; + n++; + + rsvd_region[n].start = (unsigned long) __va(ia64_boot_param->command_line); + rsvd_region[n].end = (rsvd_region[n].start + + strlen(__va(ia64_boot_param->command_line)) + 1); + n++; + + rsvd_region[n].start = (unsigned long) ia64_imva((void *)KERNEL_START); + rsvd_region[n].end = (unsigned long) ia64_imva(_end); + n++; + +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + rsvd_region[n].start = (unsigned long)__va(ia64_boot_param->initrd_start); + rsvd_region[n].end = rsvd_region[n].start + ia64_boot_param->initrd_size; + n++; + } +#endif + +#ifdef CONFIG_CRASH_DUMP + if (reserve_elfcorehdr(&rsvd_region[n].start, + &rsvd_region[n].end) == 0) + n++; +#endif + + total_memory = efi_memmap_init(&rsvd_region[n].start, &rsvd_region[n].end); + n++; + + setup_crashkernel(total_memory, &n); + + /* end of memory marker */ + rsvd_region[n].start = ~0UL; + rsvd_region[n].end = ~0UL; + n++; + + num_rsvd_regions = n; + BUG_ON(IA64_MAX_RSVD_REGIONS + 1 < n); + + sort_regions(rsvd_region, num_rsvd_regions); + num_rsvd_regions = merge_regions(rsvd_region, num_rsvd_regions); + + /* reserve all regions except the end of memory marker with memblock */ + for (n = 0; n < num_rsvd_regions - 1; n++) { + struct rsvd_region *region = &rsvd_region[n]; + phys_addr_t addr = __pa(region->start); + phys_addr_t size = region->end - region->start; + + memblock_reserve(addr, size); + } + + memblock_set_node(0, PHYS_ADDR_MAX, &memblock.reserved, 0); +} + +/** + * find_initrd - get initrd parameters from the boot parameter structure + * + * Grab the initrd start and end from the boot parameter struct given us by + * the boot loader. + */ +void __init +find_initrd (void) +{ +#ifdef CONFIG_BLK_DEV_INITRD + if (ia64_boot_param->initrd_start) { + initrd_start = (unsigned long)__va(ia64_boot_param->initrd_start); + initrd_end = initrd_start+ia64_boot_param->initrd_size; + + printk(KERN_INFO "Initial ramdisk at: 0x%lx (%llu bytes)\n", + initrd_start, ia64_boot_param->initrd_size); + } +#endif +} + +static void __init +io_port_init (void) +{ + unsigned long phys_iobase; + + /* + * Set `iobase' based on the EFI memory map or, failing that, the + * value firmware left in ar.k0. + * + * Note that in ia32 mode, IN/OUT instructions use ar.k0 to compute + * the port's virtual address, so ia32_load_state() loads it with a + * user virtual address. But in ia64 mode, glibc uses the + * *physical* address in ar.k0 to mmap the appropriate area from + * /dev/mem, and the inX()/outX() interfaces use MMIO. In both + * cases, user-mode can only use the legacy 0-64K I/O port space. + * + * ar.k0 is not involved in kernel I/O port accesses, which can use + * any of the I/O port spaces and are done via MMIO using the + * virtual mmio_base from the appropriate io_space[]. + */ + phys_iobase = efi_get_iobase(); + if (!phys_iobase) { + phys_iobase = ia64_get_kr(IA64_KR_IO_BASE); + printk(KERN_INFO "No I/O port range found in EFI memory map, " + "falling back to AR.KR0 (0x%lx)\n", phys_iobase); + } + ia64_iobase = (unsigned long) ioremap(phys_iobase, 0); + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); + + /* setup legacy IO port space */ + io_space[0].mmio_base = ia64_iobase; + io_space[0].sparse = 1; + num_io_spaces = 1; +} + +/** + * early_console_setup - setup debugging console + * + * Consoles started here require little enough setup that we can start using + * them very early in the boot process, either right after the machine + * vector initialization, or even before if the drivers can detect their hw. + * + * Returns non-zero if a console couldn't be setup. + */ +static inline int __init +early_console_setup (char *cmdline) +{ +#ifdef CONFIG_EFI_PCDP + if (!efi_setup_pcdp_console(cmdline)) + return 0; +#endif + return -1; +} + +static void __init +screen_info_setup(void) +{ + unsigned int orig_x, orig_y, num_cols, num_rows, font_height; + + memset(&screen_info, 0, sizeof(screen_info)); + + if (!ia64_boot_param->console_info.num_rows || + !ia64_boot_param->console_info.num_cols) { + printk(KERN_WARNING "invalid screen-info, guessing 80x25\n"); + orig_x = 0; + orig_y = 0; + num_cols = 80; + num_rows = 25; + font_height = 16; + } else { + orig_x = ia64_boot_param->console_info.orig_x; + orig_y = ia64_boot_param->console_info.orig_y; + num_cols = ia64_boot_param->console_info.num_cols; + num_rows = ia64_boot_param->console_info.num_rows; + font_height = 400 / num_rows; + } + + screen_info.orig_x = orig_x; + screen_info.orig_y = orig_y; + screen_info.orig_video_cols = num_cols; + screen_info.orig_video_lines = num_rows; + screen_info.orig_video_points = font_height; + screen_info.orig_video_mode = 3; /* XXX fake */ + screen_info.orig_video_isVGA = 1; /* XXX fake */ + screen_info.orig_video_ega_bx = 3; /* XXX fake */ +} + +static inline void +mark_bsp_online (void) +{ +#ifdef CONFIG_SMP + /* If we register an early console, allow CPU 0 to printk */ + set_cpu_online(smp_processor_id(), true); +#endif +} + +static __initdata int nomca; +static __init int setup_nomca(char *s) +{ + nomca = 1; + return 0; +} +early_param("nomca", setup_nomca); + +void __init +setup_arch (char **cmdline_p) +{ + unw_init(); + + ia64_patch_vtop((u64) __start___vtop_patchlist, (u64) __end___vtop_patchlist); + + *cmdline_p = __va(ia64_boot_param->command_line); + strscpy(boot_command_line, *cmdline_p, COMMAND_LINE_SIZE); + + efi_init(); + io_port_init(); + + uv_probe_system_type(); + parse_early_param(); + + if (early_console_setup(*cmdline_p) == 0) + mark_bsp_online(); + + /* Initialize the ACPI boot-time table parser */ + acpi_table_init(); + early_acpi_boot_init(); +#ifdef CONFIG_ACPI_NUMA + acpi_numa_init(); + acpi_numa_fixup(); +#ifdef CONFIG_ACPI_HOTPLUG_CPU + prefill_possible_map(); +#endif + per_cpu_scan_finalize((cpumask_empty(&early_cpu_possible_map) ? + 32 : cpumask_weight(&early_cpu_possible_map)), + additional_cpus > 0 ? additional_cpus : 0); +#endif /* CONFIG_ACPI_NUMA */ + +#ifdef CONFIG_SMP + smp_build_cpu_map(); +#endif + find_memory(); + + /* process SAL system table: */ + ia64_sal_init(__va(sal_systab_phys)); + +#ifdef CONFIG_ITANIUM + ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); +#else + { + unsigned long num_phys_stacked; + + if (ia64_pal_rse_info(&num_phys_stacked, 0) == 0 && num_phys_stacked > 96) + ia64_patch_rse((u64) __start___rse_patchlist, (u64) __end___rse_patchlist); + } +#endif + +#ifdef CONFIG_SMP + cpu_physical_id(0) = hard_smp_processor_id(); +#endif + + cpu_init(); /* initialize the bootstrap CPU */ + mmu_context_init(); /* initialize context_id bitmap */ + +#ifdef CONFIG_VT + if (!conswitchp) { +# if defined(CONFIG_VGA_CONSOLE) + /* + * Non-legacy systems may route legacy VGA MMIO range to system + * memory. vga_con probes the MMIO hole, so memory looks like + * a VGA device to it. The EFI memory map can tell us if it's + * memory so we can avoid this problem. + */ + if (efi_mem_type(0xA0000) != EFI_CONVENTIONAL_MEMORY) + conswitchp = &vga_con; +# endif + } +#endif + + /* enable IA-64 Machine Check Abort Handling unless disabled */ + if (!nomca) + ia64_mca_init(); + + /* + * Default to /dev/sda2. This assumes that the EFI partition + * is physical disk 1 partition 1 and the Linux root disk is + * physical disk 1 partition 2. + */ + ROOT_DEV = MKDEV(SCSI_DISK0_MAJOR, 2); + + if (is_uv_system()) + uv_setup(cmdline_p); +#ifdef CONFIG_SMP + else + init_smp_config(); +#endif + + screen_info_setup(); + paging_init(); + + clear_sched_clock_stable(); +} + +/* + * Display cpu info for all CPUs. + */ +static int +show_cpuinfo (struct seq_file *m, void *v) +{ +#ifdef CONFIG_SMP +# define lpj c->loops_per_jiffy +# define cpunum c->cpu +#else +# define lpj loops_per_jiffy +# define cpunum 0 +#endif + static struct { + unsigned long mask; + const char *feature_name; + } feature_bits[] = { + { 1UL << 0, "branchlong" }, + { 1UL << 1, "spontaneous deferral"}, + { 1UL << 2, "16-byte atomic ops" } + }; + char features[128], *cp, *sep; + struct cpuinfo_ia64 *c = v; + unsigned long mask; + unsigned long proc_freq; + int i, size; + + mask = c->features; + + /* build the feature string: */ + memcpy(features, "standard", 9); + cp = features; + size = sizeof(features); + sep = ""; + for (i = 0; i < ARRAY_SIZE(feature_bits) && size > 1; ++i) { + if (mask & feature_bits[i].mask) { + cp += snprintf(cp, size, "%s%s", sep, + feature_bits[i].feature_name), + sep = ", "; + mask &= ~feature_bits[i].mask; + size = sizeof(features) - (cp - features); + } + } + if (mask && size > 1) { + /* print unknown features as a hex value */ + snprintf(cp, size, "%s0x%lx", sep, mask); + } + + proc_freq = cpufreq_quick_get(cpunum); + if (!proc_freq) + proc_freq = c->proc_freq / 1000; + + seq_printf(m, + "processor : %d\n" + "vendor : %s\n" + "arch : IA-64\n" + "family : %u\n" + "model : %u\n" + "model name : %s\n" + "revision : %u\n" + "archrev : %u\n" + "features : %s\n" + "cpu number : %lu\n" + "cpu regs : %u\n" + "cpu MHz : %lu.%03lu\n" + "itc MHz : %lu.%06lu\n" + "BogoMIPS : %lu.%02lu\n", + cpunum, c->vendor, c->family, c->model, + c->model_name, c->revision, c->archrev, + features, c->ppn, c->number, + proc_freq / 1000, proc_freq % 1000, + c->itc_freq / 1000000, c->itc_freq % 1000000, + lpj*HZ/500000, (lpj*HZ/5000) % 100); +#ifdef CONFIG_SMP + seq_printf(m, "siblings : %u\n", + cpumask_weight(&cpu_core_map[cpunum])); + if (c->socket_id != -1) + seq_printf(m, "physical id: %u\n", c->socket_id); + if (c->threads_per_core > 1 || c->cores_per_socket > 1) + seq_printf(m, + "core id : %u\n" + "thread id : %u\n", + c->core_id, c->thread_id); +#endif + seq_printf(m,"\n"); + + return 0; +} + +static void * +c_start (struct seq_file *m, loff_t *pos) +{ +#ifdef CONFIG_SMP + while (*pos < nr_cpu_ids && !cpu_online(*pos)) + ++*pos; +#endif + return *pos < nr_cpu_ids ? cpu_data(*pos) : NULL; +} + +static void * +c_next (struct seq_file *m, void *v, loff_t *pos) +{ + ++*pos; + return c_start(m, pos); +} + +static void +c_stop (struct seq_file *m, void *v) +{ +} + +const struct seq_operations cpuinfo_op = { + .start = c_start, + .next = c_next, + .stop = c_stop, + .show = show_cpuinfo +}; + +#define MAX_BRANDS 8 +static char brandname[MAX_BRANDS][128]; + +static char * +get_model_name(__u8 family, __u8 model) +{ + static int overflow; + char brand[128]; + int i; + + memcpy(brand, "Unknown", 8); + if (ia64_pal_get_brand_info(brand)) { + if (family == 0x7) + memcpy(brand, "Merced", 7); + else if (family == 0x1f) switch (model) { + case 0: memcpy(brand, "McKinley", 9); break; + case 1: memcpy(brand, "Madison", 8); break; + case 2: memcpy(brand, "Madison up to 9M cache", 23); break; + } + } + for (i = 0; i < MAX_BRANDS; i++) + if (strcmp(brandname[i], brand) == 0) + return brandname[i]; + for (i = 0; i < MAX_BRANDS; i++) + if (brandname[i][0] == '\0') + return strcpy(brandname[i], brand); + if (overflow++ == 0) + printk(KERN_ERR + "%s: Table overflow. Some processor model information will be missing\n", + __func__); + return "Unknown"; +} + +static void +identify_cpu (struct cpuinfo_ia64 *c) +{ + union { + unsigned long bits[5]; + struct { + /* id 0 & 1: */ + char vendor[16]; + + /* id 2 */ + u64 ppn; /* processor serial number */ + + /* id 3: */ + unsigned number : 8; + unsigned revision : 8; + unsigned model : 8; + unsigned family : 8; + unsigned archrev : 8; + unsigned reserved : 24; + + /* id 4: */ + u64 features; + } field; + } cpuid; + pal_vm_info_1_u_t vm1; + pal_vm_info_2_u_t vm2; + pal_status_t status; + unsigned long impl_va_msb = 50, phys_addr_size = 44; /* Itanium defaults */ + int i; + for (i = 0; i < 5; ++i) + cpuid.bits[i] = ia64_get_cpuid(i); + + memcpy(c->vendor, cpuid.field.vendor, 16); +#ifdef CONFIG_SMP + c->cpu = smp_processor_id(); + + /* below default values will be overwritten by identify_siblings() + * for Multi-Threading/Multi-Core capable CPUs + */ + c->threads_per_core = c->cores_per_socket = c->num_log = 1; + c->socket_id = -1; + + identify_siblings(c); + + if (c->threads_per_core > smp_num_siblings) + smp_num_siblings = c->threads_per_core; +#endif + c->ppn = cpuid.field.ppn; + c->number = cpuid.field.number; + c->revision = cpuid.field.revision; + c->model = cpuid.field.model; + c->family = cpuid.field.family; + c->archrev = cpuid.field.archrev; + c->features = cpuid.field.features; + c->model_name = get_model_name(c->family, c->model); + + status = ia64_pal_vm_summary(&vm1, &vm2); + if (status == PAL_STATUS_SUCCESS) { + impl_va_msb = vm2.pal_vm_info_2_s.impl_va_msb; + phys_addr_size = vm1.pal_vm_info_1_s.phys_add_size; + } + c->unimpl_va_mask = ~((7L<<61) | ((1L << (impl_va_msb + 1)) - 1)); + c->unimpl_pa_mask = ~((1L<<63) | ((1L << phys_addr_size) - 1)); +} + +/* + * Do the following calculations: + * + * 1. the max. cache line size. + * 2. the minimum of the i-cache stride sizes for "flush_icache_range()". + * 3. the minimum of the cache stride sizes for "clflush_cache_range()". + */ +static void +get_cache_info(void) +{ + unsigned long line_size, max = 1; + unsigned long l, levels, unique_caches; + pal_cache_config_info_t cci; + long status; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_summary() failed (status=%ld)\n", + __func__, status); + max = SMP_CACHE_BYTES; + /* Safest setup for "flush_icache_range()" */ + ia64_i_cache_stride_shift = I_CACHE_STRIDE_SHIFT; + /* Safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; + goto out; + } + + for (l = 0; l < levels; ++l) { + /* cache_type (data_or_unified)=2 */ + status = ia64_pal_cache_config_info(l, 2, &cci); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_config_info" + "(l=%lu, 2) failed (status=%ld)\n", + __func__, l, status); + max = SMP_CACHE_BYTES; + /* The safest setup for "flush_icache_range()" */ + cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + /* The safest setup for "clflush_cache_range()" */ + ia64_cache_stride_shift = CACHE_STRIDE_SHIFT; + cci.pcci_unified = 1; + } else { + if (cci.pcci_stride < ia64_cache_stride_shift) + ia64_cache_stride_shift = cci.pcci_stride; + + line_size = 1 << cci.pcci_line_size; + if (line_size > max) + max = line_size; + } + + if (!cci.pcci_unified) { + /* cache_type (instruction)=1*/ + status = ia64_pal_cache_config_info(l, 1, &cci); + if (status != 0) { + printk(KERN_ERR "%s: ia64_pal_cache_config_info" + "(l=%lu, 1) failed (status=%ld)\n", + __func__, l, status); + /* The safest setup for flush_icache_range() */ + cci.pcci_stride = I_CACHE_STRIDE_SHIFT; + } + } + if (cci.pcci_stride < ia64_i_cache_stride_shift) + ia64_i_cache_stride_shift = cci.pcci_stride; + } + out: + if (max > ia64_max_cacheline_size) + ia64_max_cacheline_size = max; +} + +/* + * cpu_init() initializes state that is per-CPU. This function acts + * as a 'CPU state barrier', nothing should get across. + */ +void +cpu_init (void) +{ + extern void ia64_mmu_init(void *); + static unsigned long max_num_phys_stacked = IA64_NUM_PHYS_STACK_REG; + unsigned long num_phys_stacked; + pal_vm_info_2_u_t vmi; + unsigned int max_ctx; + struct cpuinfo_ia64 *cpu_info; + void *cpu_data; + + cpu_data = per_cpu_init(); +#ifdef CONFIG_SMP + /* + * insert boot cpu into sibling and core mapes + * (must be done after per_cpu area is setup) + */ + if (smp_processor_id() == 0) { + cpumask_set_cpu(0, &per_cpu(cpu_sibling_map, 0)); + cpumask_set_cpu(0, &cpu_core_map[0]); + } else { + /* + * Set ar.k3 so that assembly code in MCA handler can compute + * physical addresses of per cpu variables with a simple: + * phys = ar.k3 + &per_cpu_var + * and the alt-dtlb-miss handler can set per-cpu mapping into + * the TLB when needed. head.S already did this for cpu0. + */ + ia64_set_kr(IA64_KR_PER_CPU_DATA, + ia64_tpa(cpu_data) - (long) __per_cpu_start); + } +#endif + + get_cache_info(); + + /* + * We can't pass "local_cpu_data" to identify_cpu() because we haven't called + * ia64_mmu_init() yet. And we can't call ia64_mmu_init() first because it + * depends on the data returned by identify_cpu(). We break the dependency by + * accessing cpu_data() through the canonical per-CPU address. + */ + cpu_info = cpu_data + ((char *) &__ia64_per_cpu_var(ia64_cpu_info) - __per_cpu_start); + identify_cpu(cpu_info); + +#ifdef CONFIG_MCKINLEY + { +# define FEATURE_SET 16 + struct ia64_pal_retval iprv; + + if (cpu_info->family == 0x1f) { + PAL_CALL_PHYS(iprv, PAL_PROC_GET_FEATURES, 0, FEATURE_SET, 0); + if ((iprv.status == 0) && (iprv.v0 & 0x80) && (iprv.v2 & 0x80)) + PAL_CALL_PHYS(iprv, PAL_PROC_SET_FEATURES, + (iprv.v1 | 0x80), FEATURE_SET, 0); + } + } +#endif + + /* Clear the stack memory reserved for pt_regs: */ + memset(task_pt_regs(current), 0, sizeof(struct pt_regs)); + + ia64_set_kr(IA64_KR_FPU_OWNER, 0); + + /* + * Initialize the page-table base register to a global + * directory with all zeroes. This ensure that we can handle + * TLB-misses to user address-space even before we created the + * first user address-space. This may happen, e.g., due to + * aggressive use of lfetch.fault. + */ + ia64_set_kr(IA64_KR_PT_BASE, __pa(ia64_imva(empty_zero_page))); + + /* + * Initialize default control register to defer speculative faults except + * for those arising from TLB misses, which are not deferred. The + * kernel MUST NOT depend on a particular setting of these bits (in other words, + * the kernel must have recovery code for all speculative accesses). Turn on + * dcr.lc as per recommendation by the architecture team. Most IA-32 apps + * shouldn't be affected by this (moral: keep your ia32 locks aligned and you'll + * be fine). + */ + ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR + | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); + mmgrab(&init_mm); + current->active_mm = &init_mm; + BUG_ON(current->mm); + + ia64_mmu_init(ia64_imva(cpu_data)); + ia64_mca_cpu_init(ia64_imva(cpu_data)); + + /* Clear ITC to eliminate sched_clock() overflows in human time. */ + ia64_set_itc(0); + + /* disable all local interrupt sources: */ + ia64_set_itv(1 << 16); + ia64_set_lrr0(1 << 16); + ia64_set_lrr1(1 << 16); + ia64_setreg(_IA64_REG_CR_PMV, 1 << 16); + ia64_setreg(_IA64_REG_CR_CMCV, 1 << 16); + + /* clear TPR & XTP to enable all interrupt classes: */ + ia64_setreg(_IA64_REG_CR_TPR, 0); + + /* Clear any pending interrupts left by SAL/EFI */ + while (ia64_get_ivr() != IA64_SPURIOUS_INT_VECTOR) + ia64_eoi(); + +#ifdef CONFIG_SMP + normal_xtp(); +#endif + + /* set ia64_ctx.max_rid to the maximum RID that is supported by all CPUs: */ + if (ia64_pal_vm_summary(NULL, &vmi) == 0) { + max_ctx = (1U << (vmi.pal_vm_info_2_s.rid_size - 3)) - 1; + setup_ptcg_sem(vmi.pal_vm_info_2_s.max_purges, NPTCG_FROM_PAL); + } else { + printk(KERN_WARNING "cpu_init: PAL VM summary failed, assuming 18 RID bits\n"); + max_ctx = (1U << 15) - 1; /* use architected minimum */ + } + while (max_ctx < ia64_ctx.max_ctx) { + unsigned int old = ia64_ctx.max_ctx; + if (cmpxchg(&ia64_ctx.max_ctx, old, max_ctx) == old) + break; + } + + if (ia64_pal_rse_info(&num_phys_stacked, NULL) != 0) { + printk(KERN_WARNING "cpu_init: PAL RSE info failed; assuming 96 physical " + "stacked regs\n"); + num_phys_stacked = 96; + } + /* size of physical stacked register partition plus 8 bytes: */ + if (num_phys_stacked > max_num_phys_stacked) { + ia64_patch_phys_stack_reg(num_phys_stacked*8 + 8); + max_num_phys_stacked = num_phys_stacked; + } +} + +void __init arch_cpu_finalize_init(void) +{ + ia64_patch_mckinley_e9((unsigned long) __start___mckinley_e9_bundles, + (unsigned long) __end___mckinley_e9_bundles); +} + +static int __init run_dmi_scan(void) +{ + dmi_setup(); + return 0; +} +core_initcall(run_dmi_scan); diff --git a/arch/ia64/kernel/sigframe.h b/arch/ia64/kernel/sigframe.h new file mode 100644 index 000000000000..58a36ce6c26e --- /dev/null +++ b/arch/ia64/kernel/sigframe.h @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +struct sigscratch { + unsigned long scratch_unat; /* ar.unat for the general registers saved in pt */ + unsigned long ar_pfs; /* for syscalls, the user-level function-state */ + struct pt_regs pt; +}; + +struct sigframe { + /* + * Place signal handler args where user-level unwinder can find them easily. + * DO NOT MOVE THESE. They are part of the IA-64 Linux ABI and there is + * user-level code that depends on their presence! + */ + unsigned long arg0; /* signum */ + unsigned long arg1; /* siginfo pointer */ + unsigned long arg2; /* sigcontext pointer */ + /* + * End of architected state. + */ + + void __user *handler; /* pointer to the plabel of the signal handler */ + struct siginfo info; + struct sigcontext sc; +}; + +extern void ia64_do_signal (struct sigscratch *, long); diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c new file mode 100644 index 000000000000..51cf6a7ec158 --- /dev/null +++ b/arch/ia64/kernel/signal.c @@ -0,0 +1,412 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Architecture-specific signal handling support. + * + * Copyright (C) 1999-2004 Hewlett-Packard Co + * David Mosberger-Tang + * + * Derived from i386 and Alpha versions. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "sigframe.h" + +#define DEBUG_SIG 0 +#define STACK_ALIGN 16 /* minimal alignment for stack pointer */ + +#if _NSIG_WORDS > 1 +# define PUT_SIGSET(k,u) __copy_to_user((u)->sig, (k)->sig, sizeof(sigset_t)) +# define GET_SIGSET(k,u) __copy_from_user((k)->sig, (u)->sig, sizeof(sigset_t)) +#else +# define PUT_SIGSET(k,u) __put_user((k)->sig[0], &(u)->sig[0]) +# define GET_SIGSET(k,u) __get_user((k)->sig[0], &(u)->sig[0]) +#endif + +static long +restore_sigcontext (struct sigcontext __user *sc, struct sigscratch *scr) +{ + unsigned long ip, flags, nat, um, cfm, rsc; + long err; + + /* Always make any pending restarted system calls return -EINTR */ + current->restart_block.fn = do_no_restart_syscall; + + /* restore scratch that always needs gets updated during signal delivery: */ + err = __get_user(flags, &sc->sc_flags); + err |= __get_user(nat, &sc->sc_nat); + err |= __get_user(ip, &sc->sc_ip); /* instruction pointer */ + err |= __get_user(cfm, &sc->sc_cfm); + err |= __get_user(um, &sc->sc_um); /* user mask */ + err |= __get_user(rsc, &sc->sc_ar_rsc); + err |= __get_user(scr->pt.ar_unat, &sc->sc_ar_unat); + err |= __get_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); + err |= __get_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); + err |= __get_user(scr->pt.pr, &sc->sc_pr); /* predicates */ + err |= __get_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __get_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ + err |= __copy_from_user(&scr->pt.r1, &sc->sc_gr[1], 8); /* r1 */ + err |= __copy_from_user(&scr->pt.r8, &sc->sc_gr[8], 4*8); /* r8-r11 */ + err |= __copy_from_user(&scr->pt.r12, &sc->sc_gr[12], 2*8); /* r12-r13 */ + err |= __copy_from_user(&scr->pt.r15, &sc->sc_gr[15], 8); /* r15 */ + + scr->pt.cr_ifs = cfm | (1UL << 63); + scr->pt.ar_rsc = rsc | (3 << 2); /* force PL3 */ + + /* establish new instruction pointer: */ + scr->pt.cr_iip = ip & ~0x3UL; + ia64_psr(&scr->pt)->ri = ip & 0x3; + scr->pt.cr_ipsr = (scr->pt.cr_ipsr & ~IA64_PSR_UM) | (um & IA64_PSR_UM); + + scr->scratch_unat = ia64_put_scratch_nat_bits(&scr->pt, nat); + + if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { + /* Restore most scratch-state only when not in syscall. */ + err |= __get_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __get_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ + err |= __get_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ + err |= __copy_from_user(&scr->pt.ar_csd, &sc->sc_ar25, 2*8); /* ar.csd & ar.ssd */ + err |= __copy_from_user(&scr->pt.r2, &sc->sc_gr[2], 2*8); /* r2-r3 */ + err |= __copy_from_user(&scr->pt.r16, &sc->sc_gr[16], 16*8); /* r16-r31 */ + } + + if ((flags & IA64_SC_FLAG_FPH_VALID) != 0) { + struct ia64_psr *psr = ia64_psr(&scr->pt); + + err |= __copy_from_user(current->thread.fph, &sc->sc_fr[32], 96*16); + psr->mfh = 0; /* drop signal handler's fph contents... */ + preempt_disable(); + if (psr->dfh) + ia64_drop_fpu(current); + else { + /* We already own the local fph, otherwise psr->dfh wouldn't be 0. */ + __ia64_load_fpu(current->thread.fph); + ia64_set_local_fpu_owner(current); + } + preempt_enable(); + } + return err; +} + +long +ia64_rt_sigreturn (struct sigscratch *scr) +{ + extern char ia64_strace_leave_kernel, ia64_leave_kernel; + struct sigcontext __user *sc; + sigset_t set; + long retval; + + sc = &((struct sigframe __user *) (scr->pt.r12 + 16))->sc; + + /* + * When we return to the previously executing context, r8 and r10 have already + * been setup the way we want them. Indeed, if the signal wasn't delivered while + * in a system call, we must not touch r8 or r10 as otherwise user-level state + * could be corrupted. + */ + retval = (long) &ia64_leave_kernel; + if (test_thread_flag(TIF_SYSCALL_TRACE) + || test_thread_flag(TIF_SYSCALL_AUDIT)) + /* + * strace expects to be notified after sigreturn returns even though the + * context to which we return may not be in the middle of a syscall. + * Thus, the return-value that strace displays for sigreturn is + * meaningless. + */ + retval = (long) &ia64_strace_leave_kernel; + + if (!access_ok(sc, sizeof(*sc))) + goto give_sigsegv; + + if (GET_SIGSET(&set, &sc->sc_mask)) + goto give_sigsegv; + + set_current_blocked(&set); + + if (restore_sigcontext(sc, scr)) + goto give_sigsegv; + +#if DEBUG_SIG + printk("SIG return (%s:%d): sp=%lx ip=%lx\n", + current->comm, current->pid, scr->pt.r12, scr->pt.cr_iip); +#endif + if (restore_altstack(&sc->sc_stack)) + goto give_sigsegv; + return retval; + + give_sigsegv: + force_sig(SIGSEGV); + return retval; +} + +/* + * This does just the minimum required setup of sigcontext. + * Specifically, it only installs data that is either not knowable at + * the user-level or that gets modified before execution in the + * trampoline starts. Everything else is done at the user-level. + */ +static long +setup_sigcontext (struct sigcontext __user *sc, sigset_t *mask, struct sigscratch *scr) +{ + unsigned long flags = 0, ifs, cfm, nat; + long err = 0; + + ifs = scr->pt.cr_ifs; + + if (on_sig_stack((unsigned long) sc)) + flags |= IA64_SC_FLAG_ONSTACK; + if ((ifs & (1UL << 63)) == 0) + /* if cr_ifs doesn't have the valid bit set, we got here through a syscall */ + flags |= IA64_SC_FLAG_IN_SYSCALL; + cfm = ifs & ((1UL << 38) - 1); + ia64_flush_fph(current); + if ((current->thread.flags & IA64_THREAD_FPH_VALID)) { + flags |= IA64_SC_FLAG_FPH_VALID; + err = __copy_to_user(&sc->sc_fr[32], current->thread.fph, 96*16); + } + + nat = ia64_get_scratch_nat_bits(&scr->pt, scr->scratch_unat); + + err |= __put_user(flags, &sc->sc_flags); + err |= __put_user(nat, &sc->sc_nat); + err |= PUT_SIGSET(mask, &sc->sc_mask); + err |= __put_user(cfm, &sc->sc_cfm); + err |= __put_user(scr->pt.cr_ipsr & IA64_PSR_UM, &sc->sc_um); + err |= __put_user(scr->pt.ar_rsc, &sc->sc_ar_rsc); + err |= __put_user(scr->pt.ar_unat, &sc->sc_ar_unat); /* ar.unat */ + err |= __put_user(scr->pt.ar_fpsr, &sc->sc_ar_fpsr); /* ar.fpsr */ + err |= __put_user(scr->pt.ar_pfs, &sc->sc_ar_pfs); + err |= __put_user(scr->pt.pr, &sc->sc_pr); /* predicates */ + err |= __put_user(scr->pt.b0, &sc->sc_br[0]); /* b0 (rp) */ + err |= __put_user(scr->pt.b6, &sc->sc_br[6]); /* b6 */ + err |= __copy_to_user(&sc->sc_gr[1], &scr->pt.r1, 8); /* r1 */ + err |= __copy_to_user(&sc->sc_gr[8], &scr->pt.r8, 4*8); /* r8-r11 */ + err |= __copy_to_user(&sc->sc_gr[12], &scr->pt.r12, 2*8); /* r12-r13 */ + err |= __copy_to_user(&sc->sc_gr[15], &scr->pt.r15, 8); /* r15 */ + err |= __put_user(scr->pt.cr_iip + ia64_psr(&scr->pt)->ri, &sc->sc_ip); + + if (!(flags & IA64_SC_FLAG_IN_SYSCALL)) { + /* Copy scratch regs to sigcontext if the signal didn't interrupt a syscall. */ + err |= __put_user(scr->pt.ar_ccv, &sc->sc_ar_ccv); /* ar.ccv */ + err |= __put_user(scr->pt.b7, &sc->sc_br[7]); /* b7 */ + err |= __put_user(scr->pt.r14, &sc->sc_gr[14]); /* r14 */ + err |= __copy_to_user(&sc->sc_ar25, &scr->pt.ar_csd, 2*8); /* ar.csd & ar.ssd */ + err |= __copy_to_user(&sc->sc_gr[2], &scr->pt.r2, 2*8); /* r2-r3 */ + err |= __copy_to_user(&sc->sc_gr[16], &scr->pt.r16, 16*8); /* r16-r31 */ + } + return err; +} + +/* + * Check whether the register-backing store is already on the signal stack. + */ +static inline int +rbs_on_sig_stack (unsigned long bsp) +{ + return (bsp - current->sas_ss_sp < current->sas_ss_size); +} + +static long +setup_frame(struct ksignal *ksig, sigset_t *set, struct sigscratch *scr) +{ + extern char __kernel_sigtramp[]; + unsigned long tramp_addr, new_rbs = 0, new_sp; + struct sigframe __user *frame; + long err; + + new_sp = scr->pt.r12; + tramp_addr = (unsigned long) __kernel_sigtramp; + if (ksig->ka.sa.sa_flags & SA_ONSTACK) { + int onstack = sas_ss_flags(new_sp); + + if (onstack == 0) { + new_sp = current->sas_ss_sp + current->sas_ss_size; + /* + * We need to check for the register stack being on the + * signal stack separately, because it's switched + * separately (memory stack is switched in the kernel, + * register stack is switched in the signal trampoline). + */ + if (!rbs_on_sig_stack(scr->pt.ar_bspstore)) + new_rbs = ALIGN(current->sas_ss_sp, + sizeof(long)); + } else if (onstack == SS_ONSTACK) { + unsigned long check_sp; + + /* + * If we are on the alternate signal stack and would + * overflow it, don't. Return an always-bogus address + * instead so we will die with SIGSEGV. + */ + check_sp = (new_sp - sizeof(*frame)) & -STACK_ALIGN; + if (!likely(on_sig_stack(check_sp))) { + force_sigsegv(ksig->sig); + return 1; + } + } + } + frame = (void __user *) ((new_sp - sizeof(*frame)) & -STACK_ALIGN); + + if (!access_ok(frame, sizeof(*frame))) { + force_sigsegv(ksig->sig); + return 1; + } + + err = __put_user(ksig->sig, &frame->arg0); + err |= __put_user(&frame->info, &frame->arg1); + err |= __put_user(&frame->sc, &frame->arg2); + err |= __put_user(new_rbs, &frame->sc.sc_rbs_base); + err |= __put_user(0, &frame->sc.sc_loadrs); /* initialize to zero */ + err |= __put_user(ksig->ka.sa.sa_handler, &frame->handler); + + err |= copy_siginfo_to_user(&frame->info, &ksig->info); + + err |= __save_altstack(&frame->sc.sc_stack, scr->pt.r12); + err |= setup_sigcontext(&frame->sc, set, scr); + + if (unlikely(err)) { + force_sigsegv(ksig->sig); + return 1; + } + + scr->pt.r12 = (unsigned long) frame - 16; /* new stack pointer */ + scr->pt.ar_fpsr = FPSR_DEFAULT; /* reset fpsr for signal handler */ + scr->pt.cr_iip = tramp_addr; + ia64_psr(&scr->pt)->ri = 0; /* start executing in first slot */ + ia64_psr(&scr->pt)->be = 0; /* force little-endian byte-order */ + /* + * Force the interruption function mask to zero. This has no effect when a + * system-call got interrupted by a signal (since, in that case, scr->pt_cr_ifs is + * ignored), but it has the desirable effect of making it possible to deliver a + * signal with an incomplete register frame (which happens when a mandatory RSE + * load faults). Furthermore, it has no negative effect on the getting the user's + * dirty partition preserved, because that's governed by scr->pt.loadrs. + */ + scr->pt.cr_ifs = (1UL << 63); + + /* + * Note: this affects only the NaT bits of the scratch regs (the ones saved in + * pt_regs), which is exactly what we want. + */ + scr->scratch_unat = 0; /* ensure NaT bits of r12 is clear */ + +#if DEBUG_SIG + printk("SIG deliver (%s:%d): sig=%d sp=%lx ip=%lx handler=%p\n", + current->comm, current->pid, ksig->sig, scr->pt.r12, frame->sc.sc_ip, frame->handler); +#endif + return 0; +} + +static long +handle_signal (struct ksignal *ksig, struct sigscratch *scr) +{ + int ret = setup_frame(ksig, sigmask_to_save(), scr); + + if (!ret) + signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP)); + + return ret; +} + +/* + * Note that `init' is a special process: it doesn't get signals it doesn't want to + * handle. Thus you cannot kill init even with a SIGKILL even by mistake. + */ +void +ia64_do_signal (struct sigscratch *scr, long in_syscall) +{ + long restart = in_syscall; + long errno = scr->pt.r8; + struct ksignal ksig; + + /* + * This only loops in the rare cases of handle_signal() failing, in which case we + * need to push through a forced SIGSEGV. + */ + while (1) { + if (!get_signal(&ksig)) + break; + + /* + * get_signal() may have run a debugger (via notify_parent()) + * and the debugger may have modified the state (e.g., to arrange for an + * inferior call), thus it's important to check for restarting _after_ + * get_signal(). + */ + if ((long) scr->pt.r10 != -1) + /* + * A system calls has to be restarted only if one of the error codes + * ERESTARTNOHAND, ERESTARTSYS, or ERESTARTNOINTR is returned. If r10 + * isn't -1 then r8 doesn't hold an error code and we don't need to + * restart the syscall, so we can clear the "restart" flag here. + */ + restart = 0; + + if (ksig.sig <= 0) + break; + + if (unlikely(restart)) { + switch (errno) { + case ERESTART_RESTARTBLOCK: + case ERESTARTNOHAND: + scr->pt.r8 = EINTR; + /* note: scr->pt.r10 is already -1 */ + break; + case ERESTARTSYS: + if ((ksig.ka.sa.sa_flags & SA_RESTART) == 0) { + scr->pt.r8 = EINTR; + /* note: scr->pt.r10 is already -1 */ + break; + } + fallthrough; + case ERESTARTNOINTR: + ia64_decrement_ip(&scr->pt); + restart = 0; /* don't restart twice if handle_signal() fails... */ + } + } + + /* + * Whee! Actually deliver the signal. If the delivery failed, we need to + * continue to iterate in this loop so we can deliver the SIGSEGV... + */ + if (handle_signal(&ksig, scr)) + return; + } + + /* Did we come from a system call? */ + if (restart) { + /* Restart the system call - no handlers present */ + if (errno == ERESTARTNOHAND || errno == ERESTARTSYS || errno == ERESTARTNOINTR + || errno == ERESTART_RESTARTBLOCK) + { + /* + * Note: the syscall number is in r15 which is saved in + * pt_regs so all we need to do here is adjust ip so that + * the "break" instruction gets re-executed. + */ + ia64_decrement_ip(&scr->pt); + if (errno == ERESTART_RESTARTBLOCK) + scr->pt.r15 = __NR_restart_syscall; + } + } + + /* if there's no signal to deliver, we just put the saved sigmask + * back */ + restore_saved_sigmask(); +} diff --git a/arch/ia64/kernel/smp.c b/arch/ia64/kernel/smp.c new file mode 100644 index 000000000000..ea4f009a232b --- /dev/null +++ b/arch/ia64/kernel/smp.c @@ -0,0 +1,335 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SMP Support + * + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 1999, 2001, 2003 David Mosberger-Tang + * + * Lots of stuff stolen from arch/alpha/kernel/smp.c + * + * 01/05/16 Rohit Seth IA64-SMP functions. Reorganized + * the existing code (on the lines of x86 port). + * 00/09/11 David Mosberger Do loops_per_jiffy + * calibration on each CPU. + * 00/08/23 Asit Mallick fixed logical processor id + * 00/03/31 Rohit Seth Fixes for Bootstrap Processor + * & cpu_online_map now gets done here (instead of setup.c) + * 99/10/05 davidm Update to bring it in sync with new command-line processing + * scheme. + * 10/13/00 Goutham Rao Updated smp_call_function and + * smp_call_function_single to resend IPI on timeouts + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Note: alignment of 4 entries/cacheline was empirically determined + * to be a good tradeoff between hot cachelines & spreading the array + * across too many cacheline. + */ +static struct local_tlb_flush_counts { + unsigned int count; +} __attribute__((__aligned__(32))) local_tlb_flush_counts[NR_CPUS]; + +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned short [NR_CPUS], + shadow_flush_counts); + +#define IPI_CALL_FUNC 0 +#define IPI_CPU_STOP 1 +#define IPI_CALL_FUNC_SINGLE 2 +#define IPI_KDUMP_CPU_STOP 3 + +/* This needs to be cacheline aligned because it is written to by *other* CPUs. */ +static DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, ipi_operation); + +extern void cpu_halt (void); + +static void +stop_this_cpu(void) +{ + /* + * Remove this CPU: + */ + set_cpu_online(smp_processor_id(), false); + max_xtp(); + local_irq_disable(); + cpu_halt(); +} + +void +cpu_die(void) +{ + max_xtp(); + local_irq_disable(); + cpu_halt(); + /* Should never be here */ + BUG(); + for (;;); +} + +irqreturn_t +handle_IPI (int irq, void *dev_id) +{ + int this_cpu = get_cpu(); + unsigned long *pending_ipis = &__ia64_per_cpu_var(ipi_operation); + unsigned long ops; + + mb(); /* Order interrupt and bit testing. */ + while ((ops = xchg(pending_ipis, 0)) != 0) { + mb(); /* Order bit clearing and data access. */ + do { + unsigned long which; + + which = ffz(~ops); + ops &= ~(1 << which); + + switch (which) { + case IPI_CPU_STOP: + stop_this_cpu(); + break; + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; + case IPI_CALL_FUNC_SINGLE: + generic_smp_call_function_single_interrupt(); + break; +#ifdef CONFIG_KEXEC + case IPI_KDUMP_CPU_STOP: + unw_init_running(kdump_cpu_freeze, NULL); + break; +#endif + default: + printk(KERN_CRIT "Unknown IPI on CPU %d: %lu\n", + this_cpu, which); + break; + } + } while (ops); + mb(); /* Order data access and bit testing. */ + } + put_cpu(); + return IRQ_HANDLED; +} + + + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_single (int dest_cpu, int op) +{ + set_bit(op, &per_cpu(ipi_operation, dest_cpu)); + ia64_send_ipi(dest_cpu, IA64_IPI_VECTOR, IA64_IPI_DM_INT, 0); +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_allbutself (int op) +{ + unsigned int i; + + for_each_online_cpu(i) { + if (i != smp_processor_id()) + send_IPI_single(i, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_mask(const struct cpumask *mask, int op) +{ + unsigned int cpu; + + for_each_cpu(cpu, mask) { + send_IPI_single(cpu, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_all (int op) +{ + int i; + + for_each_online_cpu(i) { + send_IPI_single(i, op); + } +} + +/* + * Called with preemption disabled. + */ +static inline void +send_IPI_self (int op) +{ + send_IPI_single(smp_processor_id(), op); +} + +#ifdef CONFIG_KEXEC +void +kdump_smp_send_stop(void) +{ + send_IPI_allbutself(IPI_KDUMP_CPU_STOP); +} + +void +kdump_smp_send_init(void) +{ + unsigned int cpu, self_cpu; + self_cpu = smp_processor_id(); + for_each_online_cpu(cpu) { + if (cpu != self_cpu) { + if(kdump_status[cpu] == 0) + ia64_send_ipi(cpu, 0, IA64_IPI_DM_INIT, 0); + } + } +} +#endif +/* + * Called with preemption disabled. + */ +void +arch_smp_send_reschedule (int cpu) +{ + ia64_send_ipi(cpu, IA64_IPI_RESCHEDULE, IA64_IPI_DM_INT, 0); +} +EXPORT_SYMBOL_GPL(arch_smp_send_reschedule); + +/* + * Called with preemption disabled. + */ +static void +smp_send_local_flush_tlb (int cpu) +{ + ia64_send_ipi(cpu, IA64_IPI_LOCAL_TLB_FLUSH, IA64_IPI_DM_INT, 0); +} + +void +smp_local_flush_tlb(void) +{ + /* + * Use atomic ops. Otherwise, the load/increment/store sequence from + * a "++" operation can have the line stolen between the load & store. + * The overhead of the atomic op in negligible in this case & offers + * significant benefit for the brief periods where lots of cpus + * are simultaneously flushing TLBs. + */ + ia64_fetchadd(1, &local_tlb_flush_counts[smp_processor_id()].count, acq); + local_flush_tlb_all(); +} + +#define FLUSH_DELAY 5 /* Usec backoff to eliminate excessive cacheline bouncing */ + +void +smp_flush_tlb_cpumask(cpumask_t xcpumask) +{ + unsigned short *counts = __ia64_per_cpu_var(shadow_flush_counts); + cpumask_t cpumask = xcpumask; + int mycpu, cpu, flush_mycpu = 0; + + preempt_disable(); + mycpu = smp_processor_id(); + + for_each_cpu(cpu, &cpumask) + counts[cpu] = local_tlb_flush_counts[cpu].count & 0xffff; + + mb(); + for_each_cpu(cpu, &cpumask) { + if (cpu == mycpu) + flush_mycpu = 1; + else + smp_send_local_flush_tlb(cpu); + } + + if (flush_mycpu) + smp_local_flush_tlb(); + + for_each_cpu(cpu, &cpumask) + while(counts[cpu] == (local_tlb_flush_counts[cpu].count & 0xffff)) + udelay(FLUSH_DELAY); + + preempt_enable(); +} + +void +smp_flush_tlb_all (void) +{ + on_each_cpu((void (*)(void *))local_flush_tlb_all, NULL, 1); +} + +void +smp_flush_tlb_mm (struct mm_struct *mm) +{ + cpumask_var_t cpus; + preempt_disable(); + /* this happens for the common case of a single-threaded fork(): */ + if (likely(mm == current->active_mm && atomic_read(&mm->mm_users) == 1)) + { + local_finish_flush_tlb_mm(mm); + preempt_enable(); + return; + } + if (!alloc_cpumask_var(&cpus, GFP_ATOMIC)) { + smp_call_function((void (*)(void *))local_finish_flush_tlb_mm, + mm, 1); + } else { + cpumask_copy(cpus, mm_cpumask(mm)); + smp_call_function_many(cpus, + (void (*)(void *))local_finish_flush_tlb_mm, mm, 1); + free_cpumask_var(cpus); + } + local_irq_disable(); + local_finish_flush_tlb_mm(mm); + local_irq_enable(); + preempt_enable(); +} + +void arch_send_call_function_single_ipi(int cpu) +{ + send_IPI_single(cpu, IPI_CALL_FUNC_SINGLE); +} + +void arch_send_call_function_ipi_mask(const struct cpumask *mask) +{ + send_IPI_mask(mask, IPI_CALL_FUNC); +} + +/* + * this function calls the 'stop' function on all other CPUs in the system. + */ +void +smp_send_stop (void) +{ + send_IPI_allbutself(IPI_CPU_STOP); +} diff --git a/arch/ia64/kernel/smpboot.c b/arch/ia64/kernel/smpboot.c new file mode 100644 index 000000000000..d0e935cf2093 --- /dev/null +++ b/arch/ia64/kernel/smpboot.c @@ -0,0 +1,839 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * SMP boot-related support + * + * Copyright (C) 1998-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2001, 2004-2005 Intel Corp + * Rohit Seth + * Suresh Siddha + * Gordon Jin + * Ashok Raj + * + * 01/05/16 Rohit Seth Moved SMP booting functions from smp.c to here. + * 01/04/27 David Mosberger Added ITC synching code. + * 02/07/31 David Mosberger Switch over to hotplug-CPU boot-sequence. + * smp_boot_cpus()/smp_commence() is replaced by + * smp_prepare_cpus()/__cpu_up()/smp_cpus_done(). + * 04/06/21 Ashok Raj Added CPU Hotplug Support + * 04/12/26 Jin Gordon + * 04/12/26 Rohit Seth + * Add multi-threading and multi-core detection + * 05/01/30 Suresh Siddha + * Setup cpu_sibling_map and cpu_core_map + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SMP_DEBUG 0 + +#if SMP_DEBUG +#define Dprintk(x...) printk(x) +#else +#define Dprintk(x...) +#endif + +#ifdef CONFIG_HOTPLUG_CPU +#ifdef CONFIG_PERMIT_BSP_REMOVE +#define bsp_remove_ok 1 +#else +#define bsp_remove_ok 0 +#endif + +/* + * Global array allocated for NR_CPUS at boot time + */ +struct sal_to_os_boot sal_boot_rendez_state[NR_CPUS]; + +/* + * start_ap in head.S uses this to store current booting cpu + * info. + */ +struct sal_to_os_boot *sal_state_for_booting_cpu = &sal_boot_rendez_state[0]; + +#define set_brendez_area(x) (sal_state_for_booting_cpu = &sal_boot_rendez_state[(x)]); + +#else +#define set_brendez_area(x) +#endif + + +/* + * ITC synchronization related stuff: + */ +#define MASTER (0) +#define SLAVE (SMP_CACHE_BYTES/8) + +#define NUM_ROUNDS 64 /* magic value */ +#define NUM_ITERS 5 /* likewise */ + +static DEFINE_SPINLOCK(itc_sync_lock); +static volatile unsigned long go[SLAVE + 1]; + +#define DEBUG_ITC_SYNC 0 + +extern void start_ap (void); +extern unsigned long ia64_iobase; + +struct task_struct *task_for_booting_cpu; + +/* + * State for each CPU + */ +DEFINE_PER_CPU(int, cpu_state); + +cpumask_t cpu_core_map[NR_CPUS] __cacheline_aligned; +EXPORT_SYMBOL(cpu_core_map); +DEFINE_PER_CPU_SHARED_ALIGNED(cpumask_t, cpu_sibling_map); +EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); + +int smp_num_siblings = 1; + +/* which logical CPU number maps to which CPU (physical APIC ID) */ +volatile int ia64_cpu_to_sapicid[NR_CPUS]; +EXPORT_SYMBOL(ia64_cpu_to_sapicid); + +static cpumask_t cpu_callin_map; + +struct smp_boot_data smp_boot_data __initdata; + +unsigned long ap_wakeup_vector = -1; /* External Int use to wakeup APs */ + +char __initdata no_int_routing; + +unsigned char smp_int_redirect; /* are INT and IPI redirectable by the chipset? */ + +#ifdef CONFIG_FORCE_CPEI_RETARGET +#define CPEI_OVERRIDE_DEFAULT (1) +#else +#define CPEI_OVERRIDE_DEFAULT (0) +#endif + +unsigned int force_cpei_retarget = CPEI_OVERRIDE_DEFAULT; + +static int __init +cmdl_force_cpei(char *str) +{ + int value=0; + + get_option (&str, &value); + force_cpei_retarget = value; + + return 1; +} + +__setup("force_cpei=", cmdl_force_cpei); + +static int __init +nointroute (char *str) +{ + no_int_routing = 1; + printk ("no_int_routing on\n"); + return 1; +} + +__setup("nointroute", nointroute); + +static void fix_b0_for_bsp(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + int cpuid; + static int fix_bsp_b0 = 1; + + cpuid = smp_processor_id(); + + /* + * Cache the b0 value on the first AP that comes up + */ + if (!(fix_bsp_b0 && cpuid)) + return; + + sal_boot_rendez_state[0].br[0] = sal_boot_rendez_state[cpuid].br[0]; + printk ("Fixed BSP b0 value from CPU %d\n", cpuid); + + fix_bsp_b0 = 0; +#endif +} + +void +sync_master (void *arg) +{ + unsigned long flags, i; + + go[MASTER] = 0; + + local_irq_save(flags); + { + for (i = 0; i < NUM_ROUNDS*NUM_ITERS; ++i) { + while (!go[MASTER]) + cpu_relax(); + go[MASTER] = 0; + go[SLAVE] = ia64_get_itc(); + } + } + local_irq_restore(flags); +} + +/* + * Return the number of cycles by which our itc differs from the itc on the master + * (time-keeper) CPU. A positive number indicates our itc is ahead of the master, + * negative that it is behind. + */ +static inline long +get_delta (long *rt, long *master) +{ + unsigned long best_t0 = 0, best_t1 = ~0UL, best_tm = 0; + unsigned long tcenter, t0, t1, tm; + long i; + + for (i = 0; i < NUM_ITERS; ++i) { + t0 = ia64_get_itc(); + go[MASTER] = 1; + while (!(tm = go[SLAVE])) + cpu_relax(); + go[SLAVE] = 0; + t1 = ia64_get_itc(); + + if (t1 - t0 < best_t1 - best_t0) + best_t0 = t0, best_t1 = t1, best_tm = tm; + } + + *rt = best_t1 - best_t0; + *master = best_tm - best_t0; + + /* average best_t0 and best_t1 without overflow: */ + tcenter = (best_t0/2 + best_t1/2); + if (best_t0 % 2 + best_t1 % 2 == 2) + ++tcenter; + return tcenter - best_tm; +} + +/* + * Synchronize ar.itc of the current (slave) CPU with the ar.itc of the MASTER CPU + * (normally the time-keeper CPU). We use a closed loop to eliminate the possibility of + * unaccounted-for errors (such as getting a machine check in the middle of a calibration + * step). The basic idea is for the slave to ask the master what itc value it has and to + * read its own itc before and after the master responds. Each iteration gives us three + * timestamps: + * + * slave master + * + * t0 ---\ + * ---\ + * ---> + * tm + * /--- + * /--- + * t1 <--- + * + * + * The goal is to adjust the slave's ar.itc such that tm falls exactly half-way between t0 + * and t1. If we achieve this, the clocks are synchronized provided the interconnect + * between the slave and the master is symmetric. Even if the interconnect were + * asymmetric, we would still know that the synchronization error is smaller than the + * roundtrip latency (t0 - t1). + * + * When the interconnect is quiet and symmetric, this lets us synchronize the itc to + * within one or two cycles. However, we can only *guarantee* that the synchronization is + * accurate to within a round-trip time, which is typically in the range of several + * hundred cycles (e.g., ~500 cycles). In practice, this means that the itc's are usually + * almost perfectly synchronized, but we shouldn't assume that the accuracy is much better + * than half a micro second or so. + */ +void +ia64_sync_itc (unsigned int master) +{ + long i, delta, adj, adjust_latency = 0, done = 0; + unsigned long flags, rt, master_time_stamp, bound; +#if DEBUG_ITC_SYNC + struct { + long rt; /* roundtrip time */ + long master; /* master's timestamp */ + long diff; /* difference between midpoint and master's timestamp */ + long lat; /* estimate of itc adjustment latency */ + } t[NUM_ROUNDS]; +#endif + + /* + * Make sure local timer ticks are disabled while we sync. If + * they were enabled, we'd have to worry about nasty issues + * like setting the ITC ahead of (or a long time before) the + * next scheduled tick. + */ + BUG_ON((ia64_get_itv() & (1 << 16)) == 0); + + go[MASTER] = 1; + + if (smp_call_function_single(master, sync_master, NULL, 0) < 0) { + printk(KERN_ERR "sync_itc: failed to get attention of CPU %u!\n", master); + return; + } + + while (go[MASTER]) + cpu_relax(); /* wait for master to be ready */ + + spin_lock_irqsave(&itc_sync_lock, flags); + { + for (i = 0; i < NUM_ROUNDS; ++i) { + delta = get_delta(&rt, &master_time_stamp); + if (delta == 0) { + done = 1; /* let's lock on to this... */ + bound = rt; + } + + if (!done) { + if (i > 0) { + adjust_latency += -delta; + adj = -delta + adjust_latency/4; + } else + adj = -delta; + + ia64_set_itc(ia64_get_itc() + adj); + } +#if DEBUG_ITC_SYNC + t[i].rt = rt; + t[i].master = master_time_stamp; + t[i].diff = delta; + t[i].lat = adjust_latency/4; +#endif + } + } + spin_unlock_irqrestore(&itc_sync_lock, flags); + +#if DEBUG_ITC_SYNC + for (i = 0; i < NUM_ROUNDS; ++i) + printk("rt=%5ld master=%5ld diff=%5ld adjlat=%5ld\n", + t[i].rt, t[i].master, t[i].diff, t[i].lat); +#endif + + printk(KERN_INFO "CPU %d: synchronized ITC with CPU %u (last diff %ld cycles, " + "maxerr %lu cycles)\n", smp_processor_id(), master, delta, rt); +} + +/* + * Ideally sets up per-cpu profiling hooks. Doesn't do much now... + */ +static inline void smp_setup_percpu_timer(void) +{ +} + +static void +smp_callin (void) +{ + int cpuid, phys_id, itc_master; + struct cpuinfo_ia64 *last_cpuinfo, *this_cpuinfo; + extern void ia64_init_itm(void); + extern volatile int time_keeper_id; + + cpuid = smp_processor_id(); + phys_id = hard_smp_processor_id(); + itc_master = time_keeper_id; + + if (cpu_online(cpuid)) { + printk(KERN_ERR "huh, phys CPU#0x%x, CPU#0x%x already present??\n", + phys_id, cpuid); + BUG(); + } + + fix_b0_for_bsp(); + + /* + * numa_node_id() works after this. + */ + set_numa_node(cpu_to_node_map[cpuid]); + set_numa_mem(local_memory_node(cpu_to_node_map[cpuid])); + + spin_lock(&vector_lock); + /* Setup the per cpu irq handling data structures */ + __setup_vector_irq(cpuid); + notify_cpu_starting(cpuid); + set_cpu_online(cpuid, true); + per_cpu(cpu_state, cpuid) = CPU_ONLINE; + spin_unlock(&vector_lock); + + smp_setup_percpu_timer(); + + ia64_mca_cmc_vector_setup(); /* Setup vector on AP */ + + local_irq_enable(); + + if (!(sal_platform_features & IA64_SAL_PLATFORM_FEATURE_ITC_DRIFT)) { + /* + * Synchronize the ITC with the BP. Need to do this after irqs are + * enabled because ia64_sync_itc() calls smp_call_function_single(), which + * calls spin_unlock_bh(), which calls spin_unlock_bh(), which calls + * local_bh_enable(), which bugs out if irqs are not enabled... + */ + Dprintk("Going to syncup ITC with ITC Master.\n"); + ia64_sync_itc(itc_master); + } + + /* + * Get our bogomips. + */ + ia64_init_itm(); + + /* + * Delay calibration can be skipped if new processor is identical to the + * previous processor. + */ + last_cpuinfo = cpu_data(cpuid - 1); + this_cpuinfo = local_cpu_data; + if (last_cpuinfo->itc_freq != this_cpuinfo->itc_freq || + last_cpuinfo->proc_freq != this_cpuinfo->proc_freq || + last_cpuinfo->features != this_cpuinfo->features || + last_cpuinfo->revision != this_cpuinfo->revision || + last_cpuinfo->family != this_cpuinfo->family || + last_cpuinfo->archrev != this_cpuinfo->archrev || + last_cpuinfo->model != this_cpuinfo->model) + calibrate_delay(); + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + + /* + * Allow the master to continue. + */ + cpumask_set_cpu(cpuid, &cpu_callin_map); + Dprintk("Stack on CPU %d at about %p\n",cpuid, &cpuid); +} + + +/* + * Activate a secondary processor. head.S calls this. + */ +int +start_secondary (void *unused) +{ + /* Early console may use I/O ports */ + ia64_set_kr(IA64_KR_IO_BASE, __pa(ia64_iobase)); +#ifndef CONFIG_PRINTK_TIME + Dprintk("start_secondary: starting CPU 0x%x\n", hard_smp_processor_id()); +#endif + efi_map_pal_code(); + cpu_init(); + smp_callin(); + + cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); + return 0; +} + +static int +do_boot_cpu (int sapicid, int cpu, struct task_struct *idle) +{ + int timeout; + + task_for_booting_cpu = idle; + Dprintk("Sending wakeup vector %lu to AP 0x%x/0x%x.\n", ap_wakeup_vector, cpu, sapicid); + + set_brendez_area(cpu); + ia64_send_ipi(cpu, ap_wakeup_vector, IA64_IPI_DM_INT, 0); + + /* + * Wait 10s total for the AP to start + */ + Dprintk("Waiting on callin_map ..."); + for (timeout = 0; timeout < 100000; timeout++) { + if (cpumask_test_cpu(cpu, &cpu_callin_map)) + break; /* It has booted */ + barrier(); /* Make sure we re-read cpu_callin_map */ + udelay(100); + } + Dprintk("\n"); + + if (!cpumask_test_cpu(cpu, &cpu_callin_map)) { + printk(KERN_ERR "Processor 0x%x/0x%x is stuck.\n", cpu, sapicid); + ia64_cpu_to_sapicid[cpu] = -1; + set_cpu_online(cpu, false); /* was set in smp_callin() */ + return -EINVAL; + } + return 0; +} + +static int __init +decay (char *str) +{ + int ticks; + get_option (&str, &ticks); + return 1; +} + +__setup("decay=", decay); + +/* + * Initialize the logical CPU number to SAPICID mapping + */ +void __init +smp_build_cpu_map (void) +{ + int sapicid, cpu, i; + int boot_cpu_id = hard_smp_processor_id(); + + for (cpu = 0; cpu < NR_CPUS; cpu++) { + ia64_cpu_to_sapicid[cpu] = -1; + } + + ia64_cpu_to_sapicid[0] = boot_cpu_id; + init_cpu_present(cpumask_of(0)); + set_cpu_possible(0, true); + for (cpu = 1, i = 0; i < smp_boot_data.cpu_count; i++) { + sapicid = smp_boot_data.cpu_phys_id[i]; + if (sapicid == boot_cpu_id) + continue; + set_cpu_present(cpu, true); + set_cpu_possible(cpu, true); + ia64_cpu_to_sapicid[cpu] = sapicid; + cpu++; + } +} + +/* + * Cycle through the APs sending Wakeup IPIs to boot each. + */ +void __init +smp_prepare_cpus (unsigned int max_cpus) +{ + int boot_cpu_id = hard_smp_processor_id(); + + /* + * Initialize the per-CPU profiling counter/multiplier + */ + + smp_setup_percpu_timer(); + + cpumask_set_cpu(0, &cpu_callin_map); + + local_cpu_data->loops_per_jiffy = loops_per_jiffy; + ia64_cpu_to_sapicid[0] = boot_cpu_id; + + printk(KERN_INFO "Boot processor id 0x%x/0x%x\n", 0, boot_cpu_id); + + current_thread_info()->cpu = 0; + + /* + * If SMP should be disabled, then really disable it! + */ + if (!max_cpus) { + printk(KERN_INFO "SMP mode deactivated.\n"); + init_cpu_online(cpumask_of(0)); + init_cpu_present(cpumask_of(0)); + init_cpu_possible(cpumask_of(0)); + return; + } +} + +void smp_prepare_boot_cpu(void) +{ + set_cpu_online(smp_processor_id(), true); + cpumask_set_cpu(smp_processor_id(), &cpu_callin_map); + set_numa_node(cpu_to_node_map[smp_processor_id()]); + per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; +} + +#ifdef CONFIG_HOTPLUG_CPU +static inline void +clear_cpu_sibling_map(int cpu) +{ + int i; + + for_each_cpu(i, &per_cpu(cpu_sibling_map, cpu)) + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, i)); + for_each_cpu(i, &cpu_core_map[cpu]) + cpumask_clear_cpu(cpu, &cpu_core_map[i]); + + per_cpu(cpu_sibling_map, cpu) = cpu_core_map[cpu] = CPU_MASK_NONE; +} + +static void +remove_siblinginfo(int cpu) +{ + if (cpu_data(cpu)->threads_per_core == 1 && + cpu_data(cpu)->cores_per_socket == 1) { + cpumask_clear_cpu(cpu, &cpu_core_map[cpu]); + cpumask_clear_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); + return; + } + + /* remove it from all sibling map's */ + clear_cpu_sibling_map(cpu); +} + +extern void fixup_irqs(void); + +int migrate_platform_irqs(unsigned int cpu) +{ + int new_cpei_cpu; + struct irq_data *data = NULL; + const struct cpumask *mask; + int retval = 0; + + /* + * dont permit CPEI target to removed. + */ + if (cpe_vector > 0 && is_cpu_cpei_target(cpu)) { + printk ("CPU (%d) is CPEI Target\n", cpu); + if (can_cpei_retarget()) { + /* + * Now re-target the CPEI to a different processor + */ + new_cpei_cpu = cpumask_any(cpu_online_mask); + mask = cpumask_of(new_cpei_cpu); + set_cpei_target_cpu(new_cpei_cpu); + data = irq_get_irq_data(ia64_cpe_irq); + /* + * Switch for now, immediately, we need to do fake intr + * as other interrupts, but need to study CPEI behaviour with + * polling before making changes. + */ + if (data && data->chip) { + data->chip->irq_disable(data); + data->chip->irq_set_affinity(data, mask, false); + data->chip->irq_enable(data); + printk ("Re-targeting CPEI to cpu %d\n", new_cpei_cpu); + } + } + if (!data) { + printk ("Unable to retarget CPEI, offline cpu [%d] failed\n", cpu); + retval = -EBUSY; + } + } + return retval; +} + +/* must be called with cpucontrol mutex held */ +int __cpu_disable(void) +{ + int cpu = smp_processor_id(); + + /* + * dont permit boot processor for now + */ + if (cpu == 0 && !bsp_remove_ok) { + printk ("Your platform does not support removal of BSP\n"); + return (-EBUSY); + } + + set_cpu_online(cpu, false); + + if (migrate_platform_irqs(cpu)) { + set_cpu_online(cpu, true); + return -EBUSY; + } + + remove_siblinginfo(cpu); + fixup_irqs(); + local_flush_tlb_all(); + cpumask_clear_cpu(cpu, &cpu_callin_map); + return 0; +} + +void __cpu_die(unsigned int cpu) +{ + unsigned int i; + + for (i = 0; i < 100; i++) { + /* They ack this in play_dead by setting CPU_DEAD */ + if (per_cpu(cpu_state, cpu) == CPU_DEAD) + { + printk ("CPU %d is now offline\n", cpu); + return; + } + msleep(100); + } + printk(KERN_ERR "CPU %u didn't die...\n", cpu); +} +#endif /* CONFIG_HOTPLUG_CPU */ + +void +smp_cpus_done (unsigned int dummy) +{ + int cpu; + unsigned long bogosum = 0; + + /* + * Allow the user to impress friends. + */ + + for_each_online_cpu(cpu) { + bogosum += cpu_data(cpu)->loops_per_jiffy; + } + + printk(KERN_INFO "Total of %d processors activated (%lu.%02lu BogoMIPS).\n", + (int)num_online_cpus(), bogosum/(500000/HZ), (bogosum/(5000/HZ))%100); +} + +static inline void set_cpu_sibling_map(int cpu) +{ + int i; + + for_each_online_cpu(i) { + if ((cpu_data(cpu)->socket_id == cpu_data(i)->socket_id)) { + cpumask_set_cpu(i, &cpu_core_map[cpu]); + cpumask_set_cpu(cpu, &cpu_core_map[i]); + if (cpu_data(cpu)->core_id == cpu_data(i)->core_id) { + cpumask_set_cpu(i, + &per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, + &per_cpu(cpu_sibling_map, i)); + } + } + } +} + +int +__cpu_up(unsigned int cpu, struct task_struct *tidle) +{ + int ret; + int sapicid; + + sapicid = ia64_cpu_to_sapicid[cpu]; + if (sapicid == -1) + return -EINVAL; + + /* + * Already booted cpu? not valid anymore since we dont + * do idle loop tightspin anymore. + */ + if (cpumask_test_cpu(cpu, &cpu_callin_map)) + return -EINVAL; + + per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; + /* Processor goes to start_secondary(), sets online flag */ + ret = do_boot_cpu(sapicid, cpu, tidle); + if (ret < 0) + return ret; + + if (cpu_data(cpu)->threads_per_core == 1 && + cpu_data(cpu)->cores_per_socket == 1) { + cpumask_set_cpu(cpu, &per_cpu(cpu_sibling_map, cpu)); + cpumask_set_cpu(cpu, &cpu_core_map[cpu]); + return 0; + } + + set_cpu_sibling_map(cpu); + + return 0; +} + +/* + * Assume that CPUs have been discovered by some platform-dependent interface. For + * SoftSDV/Lion, that would be ACPI. + * + * Setup of the IPI irq handler is done in irq.c:init_IRQ_SMP(). + */ +void __init +init_smp_config(void) +{ + struct fptr { + unsigned long fp; + unsigned long gp; + } *ap_startup; + long sal_ret; + + /* Tell SAL where to drop the APs. */ + ap_startup = (struct fptr *) start_ap; + sal_ret = ia64_sal_set_vectors(SAL_VECTOR_OS_BOOT_RENDEZ, + ia64_tpa(ap_startup->fp), ia64_tpa(ap_startup->gp), 0, 0, 0, 0); + if (sal_ret < 0) + printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n", + ia64_sal_strerror(sal_ret)); +} + +/* + * identify_siblings(cpu) gets called from identify_cpu. This populates the + * information related to logical execution units in per_cpu_data structure. + */ +void identify_siblings(struct cpuinfo_ia64 *c) +{ + long status; + u16 pltid; + pal_logical_to_physical_t info; + + status = ia64_pal_logical_to_phys(-1, &info); + if (status != PAL_STATUS_SUCCESS) { + if (status != PAL_STATUS_UNIMPLEMENTED) { + printk(KERN_ERR + "ia64_pal_logical_to_phys failed with %ld\n", + status); + return; + } + + info.overview_ppid = 0; + info.overview_cpp = 1; + info.overview_tpc = 1; + } + + status = ia64_sal_physical_id_info(&pltid); + if (status != PAL_STATUS_SUCCESS) { + if (status != PAL_STATUS_UNIMPLEMENTED) + printk(KERN_ERR + "ia64_sal_pltid failed with %ld\n", + status); + return; + } + + c->socket_id = (pltid << 8) | info.overview_ppid; + + if (info.overview_cpp == 1 && info.overview_tpc == 1) + return; + + c->cores_per_socket = info.overview_cpp; + c->threads_per_core = info.overview_tpc; + c->num_log = info.overview_num_log; + + c->core_id = info.log1_cid; + c->thread_id = info.log1_tid; +} + +/* + * returns non zero, if multi-threading is enabled + * on at least one physical package. Due to hotplug cpu + * and (maxcpus=), all threads may not necessarily be enabled + * even though the processor supports multi-threading. + */ +int is_multithreading_enabled(void) +{ + int i, j; + + for_each_present_cpu(i) { + for_each_present_cpu(j) { + if (j == i) + continue; + if ((cpu_data(j)->socket_id == cpu_data(i)->socket_id)) { + if (cpu_data(j)->core_id == cpu_data(i)->core_id) + return 1; + } + } + } + return 0; +} +EXPORT_SYMBOL_GPL(is_multithreading_enabled); diff --git a/arch/ia64/kernel/stacktrace.c b/arch/ia64/kernel/stacktrace.c new file mode 100644 index 000000000000..6e583a6bd2f6 --- /dev/null +++ b/arch/ia64/kernel/stacktrace.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * arch/ia64/kernel/stacktrace.c + * + * Stack trace management functions + * + */ +#include +#include +#include + +static void +ia64_do_save_stack(struct unw_frame_info *info, void *arg) +{ + struct stack_trace *trace = arg; + unsigned long ip; + int skip = trace->skip; + + trace->nr_entries = 0; + do { + unw_get_ip(info, &ip); + if (ip == 0) + break; + if (skip == 0) { + trace->entries[trace->nr_entries++] = ip; + if (trace->nr_entries == trace->max_entries) + break; + } else + skip--; + } while (unw_unwind(info) >= 0); +} + +/* + * Save stack-backtrace addresses into a stack_trace buffer. + */ +void save_stack_trace(struct stack_trace *trace) +{ + unw_init_running(ia64_do_save_stack, trace); +} +EXPORT_SYMBOL(save_stack_trace); diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c new file mode 100644 index 000000000000..eb561cc93632 --- /dev/null +++ b/arch/ia64/kernel/sys_ia64.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This file contains various system calls that have different calling + * conventions on different platforms. + * + * Copyright (C) 1999-2000, 2002-2003, 2005 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include /* doh, must come after sched.h... */ +#include +#include +#include +#include + +#include +#include + +unsigned long +arch_get_unmapped_area (struct file *filp, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + long map_shared = (flags & MAP_SHARED); + unsigned long align_mask = 0; + struct mm_struct *mm = current->mm; + struct vm_unmapped_area_info info; + + if (len > RGN_MAP_LIMIT) + return -ENOMEM; + + /* handle fixed mapping: prevent overlap with huge pages */ + if (flags & MAP_FIXED) { + if (is_hugepage_only_range(mm, addr, len)) + return -EINVAL; + return addr; + } + +#ifdef CONFIG_HUGETLB_PAGE + if (REGION_NUMBER(addr) == RGN_HPAGE) + addr = 0; +#endif + if (!addr) + addr = TASK_UNMAPPED_BASE; + + if (map_shared && (TASK_SIZE > 0xfffffffful)) + /* + * For 64-bit tasks, align shared segments to 1MB to avoid potential + * performance penalty due to virtual aliasing (see ASDM). For 32-bit + * tasks, we prefer to avoid exhausting the address space too quickly by + * limiting alignment to a single page. + */ + align_mask = PAGE_MASK & (SHMLBA - 1); + + info.flags = 0; + info.length = len; + info.low_limit = addr; + info.high_limit = TASK_SIZE; + info.align_mask = align_mask; + info.align_offset = pgoff << PAGE_SHIFT; + return vm_unmapped_area(&info); +} + +asmlinkage long +ia64_getpriority (int which, int who) +{ + long prio; + + prio = sys_getpriority(which, who); + if (prio >= 0) { + force_successful_syscall_return(); + prio = 20 - prio; + } + return prio; +} + +/* XXX obsolete, but leave it here until the old libc is gone... */ +asmlinkage unsigned long +sys_getpagesize (void) +{ + return PAGE_SIZE; +} + +asmlinkage unsigned long +ia64_brk (unsigned long brk) +{ + unsigned long retval = sys_brk(brk); + force_successful_syscall_return(); + return retval; +} + +/* + * On IA-64, we return the two file descriptors in ret0 and ret1 (r8 + * and r9) as this is faster than doing a copy_to_user(). + */ +asmlinkage long +sys_ia64_pipe (void) +{ + struct pt_regs *regs = task_pt_regs(current); + int fd[2]; + int retval; + + retval = do_pipe_flags(fd, 0); + if (retval) + goto out; + retval = fd[0]; + regs->r9 = fd[1]; + out: + return retval; +} + +int ia64_mmap_check(unsigned long addr, unsigned long len, + unsigned long flags) +{ + unsigned long roff; + + /* + * Don't permit mappings into unmapped space, the virtual page table + * of a region, or across a region boundary. Note: RGN_MAP_LIMIT is + * equal to 2^n-PAGE_SIZE (for some integer n <= 61) and len > 0. + */ + roff = REGION_OFFSET(addr); + if ((len > RGN_MAP_LIMIT) || (roff > (RGN_MAP_LIMIT - len))) + return -EINVAL; + return 0; +} + +/* + * mmap2() is like mmap() except that the offset is expressed in units + * of PAGE_SIZE (instead of bytes). This allows to mmap2() (pieces + * of) files that are larger than the address space of the CPU. + */ +asmlinkage unsigned long +sys_mmap2 (unsigned long addr, unsigned long len, int prot, int flags, int fd, long pgoff) +{ + addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); + if (!IS_ERR_VALUE(addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage unsigned long +sys_mmap (unsigned long addr, unsigned long len, int prot, int flags, int fd, long off) +{ + if (offset_in_page(off) != 0) + return -EINVAL; + + addr = ksys_mmap_pgoff(addr, len, prot, flags, fd, off >> PAGE_SHIFT); + if (!IS_ERR_VALUE(addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage unsigned long +ia64_mremap (unsigned long addr, unsigned long old_len, unsigned long new_len, unsigned long flags, + unsigned long new_addr) +{ + addr = sys_mremap(addr, old_len, new_len, flags, new_addr); + if (!IS_ERR_VALUE(addr)) + force_successful_syscall_return(); + return addr; +} + +asmlinkage long +ia64_clock_getres(const clockid_t which_clock, struct __kernel_timespec __user *tp) +{ + struct timespec64 rtn_tp; + s64 tick_ns; + + /* + * ia64's clock_gettime() syscall is implemented as a vdso call + * fsys_clock_gettime(). Currently it handles only + * CLOCK_REALTIME and CLOCK_MONOTONIC. Both are based on + * 'ar.itc' counter which gets incremented at a constant + * frequency. It's usually 400MHz, ~2.5x times slower than CPU + * clock frequency. Which is almost a 1ns hrtimer, but not quite. + * + * Let's special-case these timers to report correct precision + * based on ITC frequency and not HZ frequency for supported + * clocks. + */ + switch (which_clock) { + case CLOCK_REALTIME: + case CLOCK_MONOTONIC: + tick_ns = DIV_ROUND_UP(NSEC_PER_SEC, local_cpu_data->itc_freq); + rtn_tp = ns_to_timespec64(tick_ns); + return put_timespec64(&rtn_tp, tp); + } + + return sys_clock_getres(which_clock, tp); +} diff --git a/arch/ia64/kernel/syscalls/Makefile b/arch/ia64/kernel/syscalls/Makefile new file mode 100644 index 000000000000..d009f927a048 --- /dev/null +++ b/arch/ia64/kernel/syscalls/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +kapi := arch/$(SRCARCH)/include/generated/asm +uapi := arch/$(SRCARCH)/include/generated/uapi/asm + +$(shell mkdir -p $(uapi) $(kapi)) + +syscall := $(src)/syscall.tbl +syshdr := $(srctree)/scripts/syscallhdr.sh +systbl := $(srctree)/scripts/syscalltbl.sh + +quiet_cmd_syshdr = SYSHDR $@ + cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --offset __NR_Linux $< $@ + +quiet_cmd_systbl = SYSTBL $@ + cmd_systbl = $(CONFIG_SHELL) $(systbl) $< $@ + +$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE + $(call if_changed,syshdr) + +$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE + $(call if_changed,systbl) + +uapisyshdr-y += unistd_64.h +kapisyshdr-y += syscall_table.h + +uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y)) +kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y)) +targets += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y)) + +PHONY += all +all: $(uapisyshdr-y) $(kapisyshdr-y) + @: diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl new file mode 100644 index 000000000000..6db5e2f51435 --- /dev/null +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -0,0 +1,379 @@ +# SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note +# +# Linux system call numbers and entry vectors for ia64 +# +# The format is: +# +# +# Add 1024 to will get the actual system call number +# +# The is always "common" for this file +# +0 common ni_syscall sys_ni_syscall +1 common exit sys_exit +2 common read sys_read +3 common write sys_write +4 common open sys_open +5 common close sys_close +6 common creat sys_creat +7 common link sys_link +8 common unlink sys_unlink +9 common execve ia64_execve +10 common chdir sys_chdir +11 common fchdir sys_fchdir +12 common utimes sys_utimes +13 common mknod sys_mknod +14 common chmod sys_chmod +15 common chown sys_chown +16 common lseek sys_lseek +17 common getpid sys_getpid +18 common getppid sys_getppid +19 common mount sys_mount +20 common umount2 sys_umount +21 common setuid sys_setuid +22 common getuid sys_getuid +23 common geteuid sys_geteuid +24 common ptrace sys_ptrace +25 common access sys_access +26 common sync sys_sync +27 common fsync sys_fsync +28 common fdatasync sys_fdatasync +29 common kill sys_kill +30 common rename sys_rename +31 common mkdir sys_mkdir +32 common rmdir sys_rmdir +33 common dup sys_dup +34 common pipe sys_ia64_pipe +35 common times sys_times +36 common brk ia64_brk +37 common setgid sys_setgid +38 common getgid sys_getgid +39 common getegid sys_getegid +40 common acct sys_acct +41 common ioctl sys_ioctl +42 common fcntl sys_fcntl +43 common umask sys_umask +44 common chroot sys_chroot +45 common ustat sys_ustat +46 common dup2 sys_dup2 +47 common setreuid sys_setreuid +48 common setregid sys_setregid +49 common getresuid sys_getresuid +50 common setresuid sys_setresuid +51 common getresgid sys_getresgid +52 common setresgid sys_setresgid +53 common getgroups sys_getgroups +54 common setgroups sys_setgroups +55 common getpgid sys_getpgid +56 common setpgid sys_setpgid +57 common setsid sys_setsid +58 common getsid sys_getsid +59 common sethostname sys_sethostname +60 common setrlimit sys_setrlimit +61 common getrlimit sys_getrlimit +62 common getrusage sys_getrusage +63 common gettimeofday sys_gettimeofday +64 common settimeofday sys_settimeofday +65 common select sys_select +66 common poll sys_poll +67 common symlink sys_symlink +68 common readlink sys_readlink +69 common uselib sys_uselib +70 common swapon sys_swapon +71 common swapoff sys_swapoff +72 common reboot sys_reboot +73 common truncate sys_truncate +74 common ftruncate sys_ftruncate +75 common fchmod sys_fchmod +76 common fchown sys_fchown +77 common getpriority ia64_getpriority +78 common setpriority sys_setpriority +79 common statfs sys_statfs +80 common fstatfs sys_fstatfs +81 common gettid sys_gettid +82 common semget sys_semget +83 common semop sys_semop +84 common semctl sys_semctl +85 common msgget sys_msgget +86 common msgsnd sys_msgsnd +87 common msgrcv sys_msgrcv +88 common msgctl sys_msgctl +89 common shmget sys_shmget +90 common shmat sys_shmat +91 common shmdt sys_shmdt +92 common shmctl sys_shmctl +93 common syslog sys_syslog +94 common setitimer sys_setitimer +95 common getitimer sys_getitimer +# 1120 was old_stat +# 1121 was old_lstat +# 1122 was old_fstat +99 common vhangup sys_vhangup +100 common lchown sys_lchown +101 common remap_file_pages sys_remap_file_pages +102 common wait4 sys_wait4 +103 common sysinfo sys_sysinfo +104 common clone sys_clone +105 common setdomainname sys_setdomainname +106 common uname sys_newuname +107 common adjtimex sys_adjtimex +# 1132 was create_module +109 common init_module sys_init_module +110 common delete_module sys_delete_module +# 1135 was get_kernel_syms +# 1136 was query_module +113 common quotactl sys_quotactl +114 common bdflush sys_ni_syscall +115 common sysfs sys_sysfs +116 common personality sys_personality +117 common afs_syscall sys_ni_syscall +118 common setfsuid sys_setfsuid +119 common setfsgid sys_setfsgid +120 common getdents sys_getdents +121 common flock sys_flock +122 common readv sys_readv +123 common writev sys_writev +124 common pread64 sys_pread64 +125 common pwrite64 sys_pwrite64 +126 common _sysctl sys_ni_syscall +127 common mmap sys_mmap +128 common munmap sys_munmap +129 common mlock sys_mlock +130 common mlockall sys_mlockall +131 common mprotect sys_mprotect +132 common mremap ia64_mremap +133 common msync sys_msync +134 common munlock sys_munlock +135 common munlockall sys_munlockall +136 common sched_getparam sys_sched_getparam +137 common sched_setparam sys_sched_setparam +138 common sched_getscheduler sys_sched_getscheduler +139 common sched_setscheduler sys_sched_setscheduler +140 common sched_yield sys_sched_yield +141 common sched_get_priority_max sys_sched_get_priority_max +142 common sched_get_priority_min sys_sched_get_priority_min +143 common sched_rr_get_interval sys_sched_rr_get_interval +144 common nanosleep sys_nanosleep +145 common nfsservctl sys_ni_syscall +146 common prctl sys_prctl +147 common old_getpagesize sys_getpagesize +148 common mmap2 sys_mmap2 +149 common pciconfig_read sys_pciconfig_read +150 common pciconfig_write sys_pciconfig_write +151 common perfmonctl sys_ni_syscall +152 common sigaltstack sys_sigaltstack +153 common rt_sigaction sys_rt_sigaction +154 common rt_sigpending sys_rt_sigpending +155 common rt_sigprocmask sys_rt_sigprocmask +156 common rt_sigqueueinfo sys_rt_sigqueueinfo +157 common rt_sigreturn sys_rt_sigreturn +158 common rt_sigsuspend sys_rt_sigsuspend +159 common rt_sigtimedwait sys_rt_sigtimedwait +160 common getcwd sys_getcwd +161 common capget sys_capget +162 common capset sys_capset +163 common sendfile sys_sendfile64 +164 common getpmsg sys_ni_syscall +165 common putpmsg sys_ni_syscall +166 common socket sys_socket +167 common bind sys_bind +168 common connect sys_connect +169 common listen sys_listen +170 common accept sys_accept +171 common getsockname sys_getsockname +172 common getpeername sys_getpeername +173 common socketpair sys_socketpair +174 common send sys_send +175 common sendto sys_sendto +176 common recv sys_recv +177 common recvfrom sys_recvfrom +178 common shutdown sys_shutdown +179 common setsockopt sys_setsockopt +180 common getsockopt sys_getsockopt +181 common sendmsg sys_sendmsg +182 common recvmsg sys_recvmsg +183 common pivot_root sys_pivot_root +184 common mincore sys_mincore +185 common madvise sys_madvise +186 common stat sys_newstat +187 common lstat sys_newlstat +188 common fstat sys_newfstat +189 common clone2 sys_clone2 +190 common getdents64 sys_getdents64 +191 common getunwind sys_getunwind +192 common readahead sys_readahead +193 common setxattr sys_setxattr +194 common lsetxattr sys_lsetxattr +195 common fsetxattr sys_fsetxattr +196 common getxattr sys_getxattr +197 common lgetxattr sys_lgetxattr +198 common fgetxattr sys_fgetxattr +199 common listxattr sys_listxattr +200 common llistxattr sys_llistxattr +201 common flistxattr sys_flistxattr +202 common removexattr sys_removexattr +203 common lremovexattr sys_lremovexattr +204 common fremovexattr sys_fremovexattr +205 common tkill sys_tkill +206 common futex sys_futex +207 common sched_setaffinity sys_sched_setaffinity +208 common sched_getaffinity sys_sched_getaffinity +209 common set_tid_address sys_set_tid_address +210 common fadvise64 sys_fadvise64_64 +211 common tgkill sys_tgkill +212 common exit_group sys_exit_group +213 common lookup_dcookie sys_ni_syscall +214 common io_setup sys_io_setup +215 common io_destroy sys_io_destroy +216 common io_getevents sys_io_getevents +217 common io_submit sys_io_submit +218 common io_cancel sys_io_cancel +219 common epoll_create sys_epoll_create +220 common epoll_ctl sys_epoll_ctl +221 common epoll_wait sys_epoll_wait +222 common restart_syscall sys_restart_syscall +223 common semtimedop sys_semtimedop +224 common timer_create sys_timer_create +225 common timer_settime sys_timer_settime +226 common timer_gettime sys_timer_gettime +227 common timer_getoverrun sys_timer_getoverrun +228 common timer_delete sys_timer_delete +229 common clock_settime sys_clock_settime +230 common clock_gettime sys_clock_gettime +231 common clock_getres ia64_clock_getres +232 common clock_nanosleep sys_clock_nanosleep +233 common fstatfs64 sys_fstatfs64 +234 common statfs64 sys_statfs64 +235 common mbind sys_mbind +236 common get_mempolicy sys_get_mempolicy +237 common set_mempolicy sys_set_mempolicy +238 common mq_open sys_mq_open +239 common mq_unlink sys_mq_unlink +240 common mq_timedsend sys_mq_timedsend +241 common mq_timedreceive sys_mq_timedreceive +242 common mq_notify sys_mq_notify +243 common mq_getsetattr sys_mq_getsetattr +244 common kexec_load sys_kexec_load +245 common vserver sys_ni_syscall +246 common waitid sys_waitid +247 common add_key sys_add_key +248 common request_key sys_request_key +249 common keyctl sys_keyctl +250 common ioprio_set sys_ioprio_set +251 common ioprio_get sys_ioprio_get +252 common move_pages sys_move_pages +253 common inotify_init sys_inotify_init +254 common inotify_add_watch sys_inotify_add_watch +255 common inotify_rm_watch sys_inotify_rm_watch +256 common migrate_pages sys_migrate_pages +257 common openat sys_openat +258 common mkdirat sys_mkdirat +259 common mknodat sys_mknodat +260 common fchownat sys_fchownat +261 common futimesat sys_futimesat +262 common newfstatat sys_newfstatat +263 common unlinkat sys_unlinkat +264 common renameat sys_renameat +265 common linkat sys_linkat +266 common symlinkat sys_symlinkat +267 common readlinkat sys_readlinkat +268 common fchmodat sys_fchmodat +269 common faccessat sys_faccessat +270 common pselect6 sys_pselect6 +271 common ppoll sys_ppoll +272 common unshare sys_unshare +273 common splice sys_splice +274 common set_robust_list sys_set_robust_list +275 common get_robust_list sys_get_robust_list +276 common sync_file_range sys_sync_file_range +277 common tee sys_tee +278 common vmsplice sys_vmsplice +279 common fallocate sys_fallocate +280 common getcpu sys_getcpu +281 common epoll_pwait sys_epoll_pwait +282 common utimensat sys_utimensat +283 common signalfd sys_signalfd +284 common timerfd sys_ni_syscall +285 common eventfd sys_eventfd +286 common timerfd_create sys_timerfd_create +287 common timerfd_settime sys_timerfd_settime +288 common timerfd_gettime sys_timerfd_gettime +289 common signalfd4 sys_signalfd4 +290 common eventfd2 sys_eventfd2 +291 common epoll_create1 sys_epoll_create1 +292 common dup3 sys_dup3 +293 common pipe2 sys_pipe2 +294 common inotify_init1 sys_inotify_init1 +295 common preadv sys_preadv +296 common pwritev sys_pwritev +297 common rt_tgsigqueueinfo sys_rt_tgsigqueueinfo +298 common recvmmsg sys_recvmmsg +299 common fanotify_init sys_fanotify_init +300 common fanotify_mark sys_fanotify_mark +301 common prlimit64 sys_prlimit64 +302 common name_to_handle_at sys_name_to_handle_at +303 common open_by_handle_at sys_open_by_handle_at +304 common clock_adjtime sys_clock_adjtime +305 common syncfs sys_syncfs +306 common setns sys_setns +307 common sendmmsg sys_sendmmsg +308 common process_vm_readv sys_process_vm_readv +309 common process_vm_writev sys_process_vm_writev +310 common accept4 sys_accept4 +311 common finit_module sys_finit_module +312 common sched_setattr sys_sched_setattr +313 common sched_getattr sys_sched_getattr +314 common renameat2 sys_renameat2 +315 common getrandom sys_getrandom +316 common memfd_create sys_memfd_create +317 common bpf sys_bpf +318 common execveat sys_execveat +319 common userfaultfd sys_userfaultfd +320 common membarrier sys_membarrier +321 common kcmp sys_kcmp +322 common mlock2 sys_mlock2 +323 common copy_file_range sys_copy_file_range +324 common preadv2 sys_preadv2 +325 common pwritev2 sys_pwritev2 +326 common statx sys_statx +327 common io_pgetevents sys_io_pgetevents +328 common perf_event_open sys_perf_event_open +329 common seccomp sys_seccomp +330 common pkey_mprotect sys_pkey_mprotect +331 common pkey_alloc sys_pkey_alloc +332 common pkey_free sys_pkey_free +333 common rseq sys_rseq +# 334 through 423 are reserved to sync up with other architectures +424 common pidfd_send_signal sys_pidfd_send_signal +425 common io_uring_setup sys_io_uring_setup +426 common io_uring_enter sys_io_uring_enter +427 common io_uring_register sys_io_uring_register +428 common open_tree sys_open_tree +429 common move_mount sys_move_mount +430 common fsopen sys_fsopen +431 common fsconfig sys_fsconfig +432 common fsmount sys_fsmount +433 common fspick sys_fspick +434 common pidfd_open sys_pidfd_open +# 435 reserved for clone3 +436 common close_range sys_close_range +437 common openat2 sys_openat2 +438 common pidfd_getfd sys_pidfd_getfd +439 common faccessat2 sys_faccessat2 +440 common process_madvise sys_process_madvise +441 common epoll_pwait2 sys_epoll_pwait2 +442 common mount_setattr sys_mount_setattr +443 common quotactl_fd sys_quotactl_fd +444 common landlock_create_ruleset sys_landlock_create_ruleset +445 common landlock_add_rule sys_landlock_add_rule +446 common landlock_restrict_self sys_landlock_restrict_self +# 447 reserved for memfd_secret +448 common process_mrelease sys_process_mrelease +449 common futex_waitv sys_futex_waitv +450 common set_mempolicy_home_node sys_set_mempolicy_home_node +451 common cachestat sys_cachestat +452 common fchmodat2 sys_fchmodat2 +453 common map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c new file mode 100644 index 000000000000..83ef044b63ef --- /dev/null +++ b/arch/ia64/kernel/time.c @@ -0,0 +1,463 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * linux/arch/ia64/kernel/time.c + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * Copyright (C) 1999 Don Dugger + * Copyright (C) 1999-2000 VA Linux Systems + * Copyright (C) 1999-2000 Walt Drummond + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "fsyscall_gtod_data.h" +#include "irq.h" + +static u64 itc_get_cycles(struct clocksource *cs); + +struct fsyscall_gtod_data_t fsyscall_gtod_data; + +struct itc_jitter_data_t itc_jitter_data; + +volatile int time_keeper_id = 0; /* smp_processor_id() of time-keeper */ + +#ifdef CONFIG_IA64_DEBUG_IRQ + +unsigned long last_cli_ip; +EXPORT_SYMBOL(last_cli_ip); + +#endif + +static struct clocksource clocksource_itc = { + .name = "itc", + .rating = 350, + .read = itc_get_cycles, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; +static struct clocksource *itc_clocksource; + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE + +#include + +extern u64 cycle_to_nsec(u64 cyc); + +void vtime_flush(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + u64 delta; + + if (ti->utime) + account_user_time(tsk, cycle_to_nsec(ti->utime)); + + if (ti->gtime) + account_guest_time(tsk, cycle_to_nsec(ti->gtime)); + + if (ti->idle_time) + account_idle_time(cycle_to_nsec(ti->idle_time)); + + if (ti->stime) { + delta = cycle_to_nsec(ti->stime); + account_system_index_time(tsk, delta, CPUTIME_SYSTEM); + } + + if (ti->hardirq_time) { + delta = cycle_to_nsec(ti->hardirq_time); + account_system_index_time(tsk, delta, CPUTIME_IRQ); + } + + if (ti->softirq_time) { + delta = cycle_to_nsec(ti->softirq_time); + account_system_index_time(tsk, delta, CPUTIME_SOFTIRQ); + } + + ti->utime = 0; + ti->gtime = 0; + ti->idle_time = 0; + ti->stime = 0; + ti->hardirq_time = 0; + ti->softirq_time = 0; +} + +/* + * Called from the context switch with interrupts disabled, to charge all + * accumulated times to the current process, and to prepare accounting on + * the next process. + */ +void arch_vtime_task_switch(struct task_struct *prev) +{ + struct thread_info *pi = task_thread_info(prev); + struct thread_info *ni = task_thread_info(current); + + ni->ac_stamp = pi->ac_stamp; + ni->ac_stime = ni->ac_utime = 0; +} + +/* + * Account time for a transition between system, hard irq or soft irq state. + * Note that this function is called with interrupts enabled. + */ +static __u64 vtime_delta(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + __u64 now, delta_stime; + + WARN_ON_ONCE(!irqs_disabled()); + + now = ia64_get_itc(); + delta_stime = now - ti->ac_stamp; + ti->ac_stamp = now; + + return delta_stime; +} + +void vtime_account_kernel(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + __u64 stime = vtime_delta(tsk); + + if (tsk->flags & PF_VCPU) + ti->gtime += stime; + else + ti->stime += stime; +} +EXPORT_SYMBOL_GPL(vtime_account_kernel); + +void vtime_account_idle(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + + ti->idle_time += vtime_delta(tsk); +} + +void vtime_account_softirq(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + + ti->softirq_time += vtime_delta(tsk); +} + +void vtime_account_hardirq(struct task_struct *tsk) +{ + struct thread_info *ti = task_thread_info(tsk); + + ti->hardirq_time += vtime_delta(tsk); +} + +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ + +static irqreturn_t +timer_interrupt (int irq, void *dev_id) +{ + unsigned long new_itm; + + if (cpu_is_offline(smp_processor_id())) { + return IRQ_HANDLED; + } + + new_itm = local_cpu_data->itm_next; + + if (!time_after(ia64_get_itc(), new_itm)) + printk(KERN_ERR "Oops: timer tick before it's due (itc=%lx,itm=%lx)\n", + ia64_get_itc(), new_itm); + + while (1) { + new_itm += local_cpu_data->itm_delta; + + legacy_timer_tick(smp_processor_id() == time_keeper_id); + + local_cpu_data->itm_next = new_itm; + + if (time_after(new_itm, ia64_get_itc())) + break; + + /* + * Allow IPIs to interrupt the timer loop. + */ + local_irq_enable(); + local_irq_disable(); + } + + do { + /* + * If we're too close to the next clock tick for + * comfort, we increase the safety margin by + * intentionally dropping the next tick(s). We do NOT + * update itm.next because that would force us to call + * xtime_update() which in turn would let our clock run + * too fast (with the potentially devastating effect + * of losing monotony of time). + */ + while (!time_after(new_itm, ia64_get_itc() + local_cpu_data->itm_delta/2)) + new_itm += local_cpu_data->itm_delta; + ia64_set_itm(new_itm); + /* double check, in case we got hit by a (slow) PMI: */ + } while (time_after_eq(ia64_get_itc(), new_itm)); + return IRQ_HANDLED; +} + +/* + * Encapsulate access to the itm structure for SMP. + */ +void +ia64_cpu_local_tick (void) +{ + int cpu = smp_processor_id(); + unsigned long shift = 0, delta; + + /* arrange for the cycle counter to generate a timer interrupt: */ + ia64_set_itv(IA64_TIMER_VECTOR); + + delta = local_cpu_data->itm_delta; + /* + * Stagger the timer tick for each CPU so they don't occur all at (almost) the + * same time: + */ + if (cpu) { + unsigned long hi = 1UL << ia64_fls(cpu); + shift = (2*(cpu - hi) + 1) * delta/hi/2; + } + local_cpu_data->itm_next = ia64_get_itc() + delta + shift; + ia64_set_itm(local_cpu_data->itm_next); +} + +static int nojitter; + +static int __init nojitter_setup(char *str) +{ + nojitter = 1; + printk("Jitter checking for ITC timers disabled\n"); + return 1; +} + +__setup("nojitter", nojitter_setup); + + +void ia64_init_itm(void) +{ + unsigned long platform_base_freq, itc_freq; + struct pal_freq_ratio itc_ratio, proc_ratio; + long status, platform_base_drift, itc_drift; + + /* + * According to SAL v2.6, we need to use a SAL call to determine the platform base + * frequency and then a PAL call to determine the frequency ratio between the ITC + * and the base frequency. + */ + status = ia64_sal_freq_base(SAL_FREQ_BASE_PLATFORM, + &platform_base_freq, &platform_base_drift); + if (status != 0) { + printk(KERN_ERR "SAL_FREQ_BASE_PLATFORM failed: %s\n", ia64_sal_strerror(status)); + } else { + status = ia64_pal_freq_ratios(&proc_ratio, NULL, &itc_ratio); + if (status != 0) + printk(KERN_ERR "PAL_FREQ_RATIOS failed with status=%ld\n", status); + } + if (status != 0) { + /* invent "random" values */ + printk(KERN_ERR + "SAL/PAL failed to obtain frequency info---inventing reasonable values\n"); + platform_base_freq = 100000000; + platform_base_drift = -1; /* no drift info */ + itc_ratio.num = 3; + itc_ratio.den = 1; + } + if (platform_base_freq < 40000000) { + printk(KERN_ERR "Platform base frequency %lu bogus---resetting to 75MHz!\n", + platform_base_freq); + platform_base_freq = 75000000; + platform_base_drift = -1; + } + if (!proc_ratio.den) + proc_ratio.den = 1; /* avoid division by zero */ + if (!itc_ratio.den) + itc_ratio.den = 1; /* avoid division by zero */ + + itc_freq = (platform_base_freq*itc_ratio.num)/itc_ratio.den; + + local_cpu_data->itm_delta = (itc_freq + HZ/2) / HZ; + printk(KERN_DEBUG "CPU %d: base freq=%lu.%03luMHz, ITC ratio=%u/%u, " + "ITC freq=%lu.%03luMHz", smp_processor_id(), + platform_base_freq / 1000000, (platform_base_freq / 1000) % 1000, + itc_ratio.num, itc_ratio.den, itc_freq / 1000000, (itc_freq / 1000) % 1000); + + if (platform_base_drift != -1) { + itc_drift = platform_base_drift*itc_ratio.num/itc_ratio.den; + printk("+/-%ldppm\n", itc_drift); + } else { + itc_drift = -1; + printk("\n"); + } + + local_cpu_data->proc_freq = (platform_base_freq*proc_ratio.num)/proc_ratio.den; + local_cpu_data->itc_freq = itc_freq; + local_cpu_data->cyc_per_usec = (itc_freq + USEC_PER_SEC/2) / USEC_PER_SEC; + local_cpu_data->nsec_per_cyc = ((NSEC_PER_SEC<itc_freq); + itc_clocksource = &clocksource_itc; + } +} + +static u64 itc_get_cycles(struct clocksource *cs) +{ + unsigned long lcycle, now, ret; + + if (!itc_jitter_data.itc_jitter) + return get_cycles(); + + lcycle = itc_jitter_data.itc_lastcycle; + now = get_cycles(); + if (lcycle && time_after(lcycle, now)) + return lcycle; + + /* + * Keep track of the last timer value returned. + * In an SMP environment, you could lose out in contention of + * cmpxchg. If so, your cmpxchg returns new value which the + * winner of contention updated to. Use the new value instead. + */ + ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, now); + if (unlikely(ret != lcycle)) + return ret; + + return now; +} + +void read_persistent_clock64(struct timespec64 *ts) +{ + efi_gettimeofday(ts); +} + +void __init +time_init (void) +{ + register_percpu_irq(IA64_TIMER_VECTOR, timer_interrupt, IRQF_IRQPOLL, + "timer"); + ia64_init_itm(); +} + +/* + * Generic udelay assumes that if preemption is allowed and the thread + * migrates to another CPU, that the ITC values are synchronized across + * all CPUs. + */ +static void +ia64_itc_udelay (unsigned long usecs) +{ + unsigned long start = ia64_get_itc(); + unsigned long end = start + usecs*local_cpu_data->cyc_per_usec; + + while (time_before(ia64_get_itc(), end)) + cpu_relax(); +} + +void (*ia64_udelay)(unsigned long usecs) = &ia64_itc_udelay; + +void +udelay (unsigned long usecs) +{ + (*ia64_udelay)(usecs); +} +EXPORT_SYMBOL(udelay); + +/* IA64 doesn't cache the timezone */ +void update_vsyscall_tz(void) +{ +} + +void update_vsyscall(struct timekeeper *tk) +{ + write_seqcount_begin(&fsyscall_gtod_data.seq); + + /* copy vsyscall data */ + fsyscall_gtod_data.clk_mask = tk->tkr_mono.mask; + fsyscall_gtod_data.clk_mult = tk->tkr_mono.mult; + fsyscall_gtod_data.clk_shift = tk->tkr_mono.shift; + fsyscall_gtod_data.clk_fsys_mmio = tk->tkr_mono.clock->archdata.fsys_mmio; + fsyscall_gtod_data.clk_cycle_last = tk->tkr_mono.cycle_last; + + fsyscall_gtod_data.wall_time.sec = tk->xtime_sec; + fsyscall_gtod_data.wall_time.snsec = tk->tkr_mono.xtime_nsec; + + fsyscall_gtod_data.monotonic_time.sec = tk->xtime_sec + + tk->wall_to_monotonic.tv_sec; + fsyscall_gtod_data.monotonic_time.snsec = tk->tkr_mono.xtime_nsec + + ((u64)tk->wall_to_monotonic.tv_nsec + << tk->tkr_mono.shift); + + /* normalize */ + while (fsyscall_gtod_data.monotonic_time.snsec >= + (((u64)NSEC_PER_SEC) << tk->tkr_mono.shift)) { + fsyscall_gtod_data.monotonic_time.snsec -= + ((u64)NSEC_PER_SEC) << tk->tkr_mono.shift; + fsyscall_gtod_data.monotonic_time.sec++; + } + + write_seqcount_end(&fsyscall_gtod_data.seq); +} + diff --git a/arch/ia64/kernel/topology.c b/arch/ia64/kernel/topology.c new file mode 100644 index 000000000000..94a848b06f15 --- /dev/null +++ b/arch/ia64/kernel/topology.c @@ -0,0 +1,410 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which are used on + * NUMA machines with contiguous memory. + * 2002/08/07 Erich Focht + * Populate cpu entries in sysfs for non-numa systems as well + * Intel Corporation - Ashok Raj + * 02/27/2006 Zhang, Yanmin + * Populate cpu cache entries in sysfs for cpu cache info + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static struct ia64_cpu *sysfs_cpus; + +void arch_fix_phys_package_id(int num, u32 slot) +{ +#ifdef CONFIG_SMP + if (cpu_data(num)->socket_id == -1) + cpu_data(num)->socket_id = slot; +#endif +} +EXPORT_SYMBOL_GPL(arch_fix_phys_package_id); + + +#ifdef CONFIG_HOTPLUG_CPU +int __ref arch_register_cpu(int num) +{ + /* + * If CPEI can be re-targeted or if this is not + * CPEI target, then it is hotpluggable + */ + if (can_cpei_retarget() || !is_cpu_cpei_target(num)) + sysfs_cpus[num].cpu.hotpluggable = 1; + map_cpu_to_node(num, node_cpuid[num].nid); + return register_cpu(&sysfs_cpus[num].cpu, num); +} +EXPORT_SYMBOL(arch_register_cpu); + +void __ref arch_unregister_cpu(int num) +{ + unregister_cpu(&sysfs_cpus[num].cpu); + unmap_cpu_from_node(num, cpu_to_node(num)); +} +EXPORT_SYMBOL(arch_unregister_cpu); +#else +static int __init arch_register_cpu(int num) +{ + return register_cpu(&sysfs_cpus[num].cpu, num); +} +#endif /*CONFIG_HOTPLUG_CPU*/ + + +static int __init topology_init(void) +{ + int i, err = 0; + + sysfs_cpus = kcalloc(NR_CPUS, sizeof(struct ia64_cpu), GFP_KERNEL); + if (!sysfs_cpus) + panic("kzalloc in topology_init failed - NR_CPUS too big?"); + + for_each_present_cpu(i) { + if((err = arch_register_cpu(i))) + goto out; + } +out: + return err; +} + +subsys_initcall(topology_init); + + +/* + * Export cpu cache information through sysfs + */ + +/* + * A bunch of string array to get pretty printing + */ +static const char *cache_types[] = { + "", /* not used */ + "Instruction", + "Data", + "Unified" /* unified */ +}; + +static const char *cache_mattrib[]={ + "WriteThrough", + "WriteBack", + "", /* reserved */ + "" /* reserved */ +}; + +struct cache_info { + pal_cache_config_info_t cci; + cpumask_t shared_cpu_map; + int level; + int type; + struct kobject kobj; +}; + +struct cpu_cache_info { + struct cache_info *cache_leaves; + int num_cache_leaves; + struct kobject kobj; +}; + +static struct cpu_cache_info all_cpu_cache_info[NR_CPUS]; +#define LEAF_KOBJECT_PTR(x,y) (&all_cpu_cache_info[x].cache_leaves[y]) + +#ifdef CONFIG_SMP +static void cache_shared_cpu_map_setup(unsigned int cpu, + struct cache_info * this_leaf) +{ + pal_cache_shared_info_t csi; + int num_shared, i = 0; + unsigned int j; + + if (cpu_data(cpu)->threads_per_core <= 1 && + cpu_data(cpu)->cores_per_socket <= 1) { + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + return; + } + + if (ia64_pal_cache_shared_info(this_leaf->level, + this_leaf->type, + 0, + &csi) != PAL_STATUS_SUCCESS) + return; + + num_shared = (int) csi.num_shared; + do { + for_each_possible_cpu(j) + if (cpu_data(cpu)->socket_id == cpu_data(j)->socket_id + && cpu_data(j)->core_id == csi.log1_cid + && cpu_data(j)->thread_id == csi.log1_tid) + cpumask_set_cpu(j, &this_leaf->shared_cpu_map); + + i++; + } while (i < num_shared && + ia64_pal_cache_shared_info(this_leaf->level, + this_leaf->type, + i, + &csi) == PAL_STATUS_SUCCESS); +} +#else +static void cache_shared_cpu_map_setup(unsigned int cpu, + struct cache_info * this_leaf) +{ + cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + return; +} +#endif + +static ssize_t show_coherency_line_size(struct cache_info *this_leaf, + char *buf) +{ + return sprintf(buf, "%u\n", 1 << this_leaf->cci.pcci_line_size); +} + +static ssize_t show_ways_of_associativity(struct cache_info *this_leaf, + char *buf) +{ + return sprintf(buf, "%u\n", this_leaf->cci.pcci_assoc); +} + +static ssize_t show_attributes(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, + "%s\n", + cache_mattrib[this_leaf->cci.pcci_cache_attr]); +} + +static ssize_t show_size(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, "%uK\n", this_leaf->cci.pcci_cache_size / 1024); +} + +static ssize_t show_number_of_sets(struct cache_info *this_leaf, char *buf) +{ + unsigned number_of_sets = this_leaf->cci.pcci_cache_size; + number_of_sets /= this_leaf->cci.pcci_assoc; + number_of_sets /= 1 << this_leaf->cci.pcci_line_size; + + return sprintf(buf, "%u\n", number_of_sets); +} + +static ssize_t show_shared_cpu_map(struct cache_info *this_leaf, char *buf) +{ + cpumask_t shared_cpu_map; + + cpumask_and(&shared_cpu_map, + &this_leaf->shared_cpu_map, cpu_online_mask); + return scnprintf(buf, PAGE_SIZE, "%*pb\n", + cpumask_pr_args(&shared_cpu_map)); +} + +static ssize_t show_type(struct cache_info *this_leaf, char *buf) +{ + int type = this_leaf->type + this_leaf->cci.pcci_unified; + return sprintf(buf, "%s\n", cache_types[type]); +} + +static ssize_t show_level(struct cache_info *this_leaf, char *buf) +{ + return sprintf(buf, "%u\n", this_leaf->level); +} + +struct cache_attr { + struct attribute attr; + ssize_t (*show)(struct cache_info *, char *); + ssize_t (*store)(struct cache_info *, const char *, size_t count); +}; + +#ifdef define_one_ro + #undef define_one_ro +#endif +#define define_one_ro(_name) \ + static struct cache_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(level); +define_one_ro(type); +define_one_ro(coherency_line_size); +define_one_ro(ways_of_associativity); +define_one_ro(size); +define_one_ro(number_of_sets); +define_one_ro(shared_cpu_map); +define_one_ro(attributes); + +static struct attribute * cache_default_attrs[] = { + &type.attr, + &level.attr, + &coherency_line_size.attr, + &ways_of_associativity.attr, + &attributes.attr, + &size.attr, + &number_of_sets.attr, + &shared_cpu_map.attr, + NULL +}; +ATTRIBUTE_GROUPS(cache_default); + +#define to_object(k) container_of(k, struct cache_info, kobj) +#define to_attr(a) container_of(a, struct cache_attr, attr) + +static ssize_t ia64_cache_show(struct kobject * kobj, struct attribute * attr, char * buf) +{ + struct cache_attr *fattr = to_attr(attr); + struct cache_info *this_leaf = to_object(kobj); + ssize_t ret; + + ret = fattr->show ? fattr->show(this_leaf, buf) : 0; + return ret; +} + +static const struct sysfs_ops cache_sysfs_ops = { + .show = ia64_cache_show +}; + +static struct kobj_type cache_ktype = { + .sysfs_ops = &cache_sysfs_ops, + .default_groups = cache_default_groups, +}; + +static struct kobj_type cache_ktype_percpu_entry = { + .sysfs_ops = &cache_sysfs_ops, +}; + +static void cpu_cache_sysfs_exit(unsigned int cpu) +{ + kfree(all_cpu_cache_info[cpu].cache_leaves); + all_cpu_cache_info[cpu].cache_leaves = NULL; + all_cpu_cache_info[cpu].num_cache_leaves = 0; + memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); + return; +} + +static int cpu_cache_sysfs_init(unsigned int cpu) +{ + unsigned long i, levels, unique_caches; + pal_cache_config_info_t cci; + int j; + long status; + struct cache_info *this_cache; + int num_cache_leaves = 0; + + if ((status = ia64_pal_cache_summary(&levels, &unique_caches)) != 0) { + printk(KERN_ERR "ia64_pal_cache_summary=%ld\n", status); + return -1; + } + + this_cache=kcalloc(unique_caches, sizeof(struct cache_info), + GFP_KERNEL); + if (this_cache == NULL) + return -ENOMEM; + + for (i=0; i < levels; i++) { + for (j=2; j >0 ; j--) { + if ((status=ia64_pal_cache_config_info(i,j, &cci)) != + PAL_STATUS_SUCCESS) + continue; + + this_cache[num_cache_leaves].cci = cci; + this_cache[num_cache_leaves].level = i + 1; + this_cache[num_cache_leaves].type = j; + + cache_shared_cpu_map_setup(cpu, + &this_cache[num_cache_leaves]); + num_cache_leaves ++; + } + } + + all_cpu_cache_info[cpu].cache_leaves = this_cache; + all_cpu_cache_info[cpu].num_cache_leaves = num_cache_leaves; + + memset(&all_cpu_cache_info[cpu].kobj, 0, sizeof(struct kobject)); + + return 0; +} + +/* Add cache interface for CPU device */ +static int cache_add_dev(unsigned int cpu) +{ + struct device *sys_dev = get_cpu_device(cpu); + unsigned long i, j; + struct cache_info *this_object; + int retval = 0; + + if (all_cpu_cache_info[cpu].kobj.parent) + return 0; + + + retval = cpu_cache_sysfs_init(cpu); + if (unlikely(retval < 0)) + return retval; + + retval = kobject_init_and_add(&all_cpu_cache_info[cpu].kobj, + &cache_ktype_percpu_entry, &sys_dev->kobj, + "%s", "cache"); + if (unlikely(retval < 0)) { + cpu_cache_sysfs_exit(cpu); + return retval; + } + + for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) { + this_object = LEAF_KOBJECT_PTR(cpu,i); + retval = kobject_init_and_add(&(this_object->kobj), + &cache_ktype, + &all_cpu_cache_info[cpu].kobj, + "index%1lu", i); + if (unlikely(retval)) { + for (j = 0; j < i; j++) { + kobject_put(&(LEAF_KOBJECT_PTR(cpu,j)->kobj)); + } + kobject_put(&all_cpu_cache_info[cpu].kobj); + cpu_cache_sysfs_exit(cpu); + return retval; + } + kobject_uevent(&(this_object->kobj), KOBJ_ADD); + } + kobject_uevent(&all_cpu_cache_info[cpu].kobj, KOBJ_ADD); + return retval; +} + +/* Remove cache interface for CPU device */ +static int cache_remove_dev(unsigned int cpu) +{ + unsigned long i; + + for (i = 0; i < all_cpu_cache_info[cpu].num_cache_leaves; i++) + kobject_put(&(LEAF_KOBJECT_PTR(cpu,i)->kobj)); + + if (all_cpu_cache_info[cpu].kobj.parent) { + kobject_put(&all_cpu_cache_info[cpu].kobj); + memset(&all_cpu_cache_info[cpu].kobj, + 0, + sizeof(struct kobject)); + } + + cpu_cache_sysfs_exit(cpu); + + return 0; +} + +static int __init cache_sysfs_init(void) +{ + int ret; + + ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "ia64/topology:online", + cache_add_dev, cache_remove_dev); + WARN_ON(ret < 0); + return 0; +} +device_initcall(cache_sysfs_init); diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c new file mode 100644 index 000000000000..53735b1d1be3 --- /dev/null +++ b/arch/ia64/kernel/traps.c @@ -0,0 +1,612 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Architecture-specific trap handling. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 05/12/00 grao : added isr in siginfo for SIGFPE + */ + +#include +#include +#include +#include +#include +#include /* For unblank_screen() */ +#include +#include +#include +#include +#include /* for ssleep() */ +#include +#include + +#include +#include +#include +#include +#include + +fpswa_interface_t *fpswa_interface; +EXPORT_SYMBOL(fpswa_interface); + +void __init +trap_init (void) +{ + if (ia64_boot_param->fpswa) + /* FPSWA fixup: make the interface pointer a kernel virtual address: */ + fpswa_interface = __va(ia64_boot_param->fpswa); +} + +int +die (const char *str, struct pt_regs *regs, long err) +{ + static struct { + spinlock_t lock; + u32 lock_owner; + int lock_owner_depth; + } die = { + .lock = __SPIN_LOCK_UNLOCKED(die.lock), + .lock_owner = -1, + .lock_owner_depth = 0 + }; + static int die_counter; + int cpu = get_cpu(); + + if (die.lock_owner != cpu) { + console_verbose(); + spin_lock_irq(&die.lock); + die.lock_owner = cpu; + die.lock_owner_depth = 0; + bust_spinlocks(1); + } + put_cpu(); + + if (++die.lock_owner_depth < 3) { + printk("%s[%d]: %s %ld [%d]\n", + current->comm, task_pid_nr(current), str, err, ++die_counter); + if (notify_die(DIE_OOPS, str, regs, err, 255, SIGSEGV) + != NOTIFY_STOP) + show_regs(regs); + else + regs = NULL; + } else + printk(KERN_ERR "Recursive die() failure, output suppressed\n"); + + bust_spinlocks(0); + die.lock_owner = -1; + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + spin_unlock_irq(&die.lock); + + if (!regs) + return 1; + + if (panic_on_oops) + panic("Fatal exception"); + + make_task_dead(SIGSEGV); + return 0; +} + +int +die_if_kernel (char *str, struct pt_regs *regs, long err) +{ + if (!user_mode(regs)) + return die(str, regs, err); + return 0; +} + +void +__kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) +{ + int sig, code; + + switch (break_num) { + case 0: /* unknown error (used by GCC for __builtin_abort()) */ + if (notify_die(DIE_BREAK, "break 0", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; + if (die_if_kernel("bugcheck!", regs, break_num)) + return; + sig = SIGILL; code = ILL_ILLOPC; + break; + + case 1: /* integer divide by zero */ + sig = SIGFPE; code = FPE_INTDIV; + break; + + case 2: /* integer overflow */ + sig = SIGFPE; code = FPE_INTOVF; + break; + + case 3: /* range check/bounds check */ + sig = SIGFPE; code = FPE_FLTSUB; + break; + + case 4: /* null pointer dereference */ + sig = SIGSEGV; code = SEGV_MAPERR; + break; + + case 5: /* misaligned data */ + sig = SIGSEGV; code = BUS_ADRALN; + break; + + case 6: /* decimal overflow */ + sig = SIGFPE; code = __FPE_DECOVF; + break; + + case 7: /* decimal divide by zero */ + sig = SIGFPE; code = __FPE_DECDIV; + break; + + case 8: /* packed decimal error */ + sig = SIGFPE; code = __FPE_DECERR; + break; + + case 9: /* invalid ASCII digit */ + sig = SIGFPE; code = __FPE_INVASC; + break; + + case 10: /* invalid decimal digit */ + sig = SIGFPE; code = __FPE_INVDEC; + break; + + case 11: /* paragraph stack overflow */ + sig = SIGSEGV; code = __SEGV_PSTKOVF; + break; + + case 0x3f000 ... 0x3ffff: /* bundle-update in progress */ + sig = SIGILL; code = __ILL_BNDMOD; + break; + + default: + if ((break_num < 0x40000 || break_num > 0x100000) + && die_if_kernel("Bad break", regs, break_num)) + return; + + if (break_num < 0x80000) { + sig = SIGILL; code = __ILL_BREAK; + } else { + if (notify_die(DIE_BREAK, "bad break", regs, break_num, TRAP_BRKPT, SIGTRAP) + == NOTIFY_STOP) + return; + sig = SIGTRAP; code = TRAP_BRKPT; + } + } + force_sig_fault(sig, code, + (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), + break_num, 0 /* clear __ISR_VALID */, 0); +} + +/* + * disabled_fph_fault() is called when a user-level process attempts to access f32..f127 + * and it doesn't own the fp-high register partition. When this happens, we save the + * current fph partition in the task_struct of the fpu-owner (if necessary) and then load + * the fp-high partition of the current task (if necessary). Note that the kernel has + * access to fph by the time we get here, as the IVT's "Disabled FP-Register" handler takes + * care of clearing psr.dfh. + */ +static inline void +disabled_fph_fault (struct pt_regs *regs) +{ + struct ia64_psr *psr = ia64_psr(regs); + + /* first, grant user-level access to fph partition: */ + psr->dfh = 0; + + /* + * Make sure that no other task gets in on this processor + * while we're claiming the FPU + */ + preempt_disable(); +#ifndef CONFIG_SMP + { + struct task_struct *fpu_owner + = (struct task_struct *)ia64_get_kr(IA64_KR_FPU_OWNER); + + if (ia64_is_local_fpu_owner(current)) { + preempt_enable_no_resched(); + return; + } + + if (fpu_owner) + ia64_flush_fph(fpu_owner); + } +#endif /* !CONFIG_SMP */ + ia64_set_local_fpu_owner(current); + if ((current->thread.flags & IA64_THREAD_FPH_VALID) != 0) { + __ia64_load_fpu(current->thread.fph); + psr->mfh = 0; + } else { + __ia64_init_fpu(); + /* + * Set mfh because the state in thread.fph does not match the state in + * the fph partition. + */ + psr->mfh = 1; + } + preempt_enable_no_resched(); +} + +static inline int +fp_emulate (int fp_fault, void *bundle, long *ipsr, long *fpsr, long *isr, long *pr, long *ifs, + struct pt_regs *regs) +{ + fp_state_t fp_state; + fpswa_ret_t ret; + + if (!fpswa_interface) + return -1; + + memset(&fp_state, 0, sizeof(fp_state_t)); + + /* + * compute fp_state. only FP registers f6 - f11 are used by the + * kernel, so set those bits in the mask and set the low volatile + * pointer to point to these registers. + */ + fp_state.bitmask_low64 = 0xfc0; /* bit6..bit11 */ + + fp_state.fp_state_low_volatile = (fp_state_low_volatile_t *) ®s->f6; + /* + * unsigned long (*EFI_FPSWA) ( + * unsigned long trap_type, + * void *Bundle, + * unsigned long *pipsr, + * unsigned long *pfsr, + * unsigned long *pisr, + * unsigned long *ppreds, + * unsigned long *pifs, + * void *fp_state); + */ + ret = (*fpswa_interface->fpswa)((unsigned long) fp_fault, bundle, + (unsigned long *) ipsr, (unsigned long *) fpsr, + (unsigned long *) isr, (unsigned long *) pr, + (unsigned long *) ifs, &fp_state); + + return ret.status; +} + +struct fpu_swa_msg { + unsigned long count; + unsigned long time; +}; +static DEFINE_PER_CPU(struct fpu_swa_msg, cpulast); +DECLARE_PER_CPU(struct fpu_swa_msg, cpulast); +static struct fpu_swa_msg last __cacheline_aligned; + + +/* + * Handle floating-point assist faults and traps. + */ +static int +handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) +{ + long exception, bundle[2]; + unsigned long fault_ip; + + fault_ip = regs->cr_iip; + if (!fp_fault && (ia64_psr(regs)->ri == 0)) + fault_ip -= 16; + if (copy_from_user(bundle, (void __user *) fault_ip, sizeof(bundle))) + return -1; + + if (!(current->thread.flags & IA64_THREAD_FPEMU_NOPRINT)) { + unsigned long count, current_jiffies = jiffies; + struct fpu_swa_msg *cp = this_cpu_ptr(&cpulast); + + if (unlikely(current_jiffies > cp->time)) + cp->count = 0; + if (unlikely(cp->count < 5)) { + cp->count++; + cp->time = current_jiffies + 5 * HZ; + + /* minimize races by grabbing a copy of count BEFORE checking last.time. */ + count = last.count; + barrier(); + + /* + * Lower 4 bits are used as a count. Upper bits are a sequence + * number that is updated when count is reset. The cmpxchg will + * fail is seqno has changed. This minimizes multiple cpus + * resetting the count. + */ + if (current_jiffies > last.time) + (void) cmpxchg_acq(&last.count, count, 16 + (count & ~15)); + + /* used fetchadd to atomically update the count */ + if ((last.count & 15) < 5 && (ia64_fetchadd(1, &last.count, acq) & 15) < 5) { + last.time = current_jiffies + 5 * HZ; + printk(KERN_WARNING + "%s(%d): floating-point assist fault at ip %016lx, isr %016lx\n", + current->comm, task_pid_nr(current), regs->cr_iip + ia64_psr(regs)->ri, isr); + } + } + } + + exception = fp_emulate(fp_fault, bundle, ®s->cr_ipsr, ®s->ar_fpsr, &isr, ®s->pr, + ®s->cr_ifs, regs); + if (fp_fault) { + if (exception == 0) { + /* emulation was successful */ + ia64_increment_ip(regs); + } else if (exception == -1) { + printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); + return -1; + } else { + /* is next instruction a trap? */ + int si_code; + + if (exception & 2) { + ia64_increment_ip(regs); + } + si_code = FPE_FLTUNK; /* default code */ + if (isr & 0x11) { + si_code = FPE_FLTINV; + } else if (isr & 0x22) { + /* denormal operand gets the same si_code as underflow + * see arch/i386/kernel/traps.c:math_error() */ + si_code = FPE_FLTUND; + } else if (isr & 0x44) { + si_code = FPE_FLTDIV; + } + force_sig_fault(SIGFPE, si_code, + (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), + 0, __ISR_VALID, isr); + } + } else { + if (exception == -1) { + printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); + return -1; + } else if (exception != 0) { + /* raise exception */ + int si_code; + + si_code = FPE_FLTUNK; /* default code */ + if (isr & 0x880) { + si_code = FPE_FLTOVF; + } else if (isr & 0x1100) { + si_code = FPE_FLTUND; + } else if (isr & 0x2200) { + si_code = FPE_FLTRES; + } + force_sig_fault(SIGFPE, si_code, + (void __user *) (regs->cr_iip + ia64_psr(regs)->ri), + 0, __ISR_VALID, isr); + } + } + return 0; +} + +struct illegal_op_return { + unsigned long fkt, arg1, arg2, arg3; +}; + +struct illegal_op_return +ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, + long arg4, long arg5, long arg6, long arg7, + struct pt_regs regs) +{ + struct illegal_op_return rv; + char buf[128]; + +#ifdef CONFIG_IA64_BRL_EMU + { + extern struct illegal_op_return ia64_emulate_brl (struct pt_regs *, unsigned long); + + rv = ia64_emulate_brl(®s, ec); + if (rv.fkt != (unsigned long) -1) + return rv; + } +#endif + + sprintf(buf, "IA-64 Illegal operation fault"); + rv.fkt = 0; + if (die_if_kernel(buf, ®s, 0)) + return rv; + + force_sig_fault(SIGILL, ILL_ILLOPC, + (void __user *) (regs.cr_iip + ia64_psr(®s)->ri), + 0, 0, 0); + return rv; +} + +void __kprobes +ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, + unsigned long iim, unsigned long itir, long arg5, long arg6, + long arg7, struct pt_regs regs) +{ + unsigned long code, error = isr, iip; + char buf[128]; + int result, sig, si_code; + static const char *reason[] = { + "IA-64 Illegal Operation fault", + "IA-64 Privileged Operation fault", + "IA-64 Privileged Register fault", + "IA-64 Reserved Register/Field fault", + "Disabled Instruction Set Transition fault", + "Unknown fault 5", "Unknown fault 6", "Unknown fault 7", "Illegal Hazard fault", + "Unknown fault 9", "Unknown fault 10", "Unknown fault 11", "Unknown fault 12", + "Unknown fault 13", "Unknown fault 14", "Unknown fault 15" + }; + + if ((isr & IA64_ISR_NA) && ((isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) { + /* + * This fault was due to lfetch.fault, set "ed" bit in the psr to cancel + * the lfetch. + */ + ia64_psr(®s)->ed = 1; + return; + } + + iip = regs.cr_iip + ia64_psr(®s)->ri; + + switch (vector) { + case 24: /* General Exception */ + code = (isr >> 4) & 0xf; + sprintf(buf, "General Exception: %s%s", reason[code], + (code == 3) ? ((isr & (1UL << 37)) + ? " (RSE access)" : " (data access)") : ""); + if (code == 8) { +# ifdef CONFIG_IA64_PRINT_HAZARDS + printk("%s[%d]: possible hazard @ ip=%016lx (pr = %016lx)\n", + current->comm, task_pid_nr(current), + regs.cr_iip + ia64_psr(®s)->ri, regs.pr); +# endif + return; + } + break; + + case 25: /* Disabled FP-Register */ + if (isr & 2) { + disabled_fph_fault(®s); + return; + } + sprintf(buf, "Disabled FPL fault---not supposed to happen!"); + break; + + case 26: /* NaT Consumption */ + if (user_mode(®s)) { + void __user *addr; + + if (((isr >> 4) & 0xf) == 2) { + /* NaT page consumption */ + sig = SIGSEGV; + code = SEGV_ACCERR; + addr = (void __user *) ifa; + } else { + /* register NaT consumption */ + sig = SIGILL; + code = ILL_ILLOPN; + addr = (void __user *) (regs.cr_iip + + ia64_psr(®s)->ri); + } + force_sig_fault(sig, code, addr, + vector, __ISR_VALID, isr); + return; + } else if (ia64_done_with_exception(®s)) + return; + sprintf(buf, "NaT consumption"); + break; + + case 31: /* Unsupported Data Reference */ + if (user_mode(®s)) { + force_sig_fault(SIGILL, ILL_ILLOPN, (void __user *) iip, + vector, __ISR_VALID, isr); + return; + } + sprintf(buf, "Unsupported data reference"); + break; + + case 29: /* Debug */ + case 35: /* Taken Branch Trap */ + case 36: /* Single Step Trap */ + if (fsys_mode(current, ®s)) { + extern char __kernel_syscall_via_break[]; + /* + * Got a trap in fsys-mode: Taken Branch Trap + * and Single Step trap need special handling; + * Debug trap is ignored (we disable it here + * and re-enable it in the lower-privilege trap). + */ + if (unlikely(vector == 29)) { + set_thread_flag(TIF_DB_DISABLED); + ia64_psr(®s)->db = 0; + ia64_psr(®s)->lp = 1; + return; + } + /* re-do the system call via break 0x100000: */ + regs.cr_iip = (unsigned long) __kernel_syscall_via_break; + ia64_psr(®s)->ri = 0; + ia64_psr(®s)->cpl = 3; + return; + } + switch (vector) { + default: + case 29: + si_code = TRAP_HWBKPT; +#ifdef CONFIG_ITANIUM + /* + * Erratum 10 (IFA may contain incorrect address) now has + * "NoFix" status. There are no plans for fixing this. + */ + if (ia64_psr(®s)->is == 0) + ifa = regs.cr_iip; +#endif + break; + case 35: si_code = TRAP_BRANCH; ifa = 0; break; + case 36: si_code = TRAP_TRACE; ifa = 0; break; + } + if (notify_die(DIE_FAULT, "ia64_fault", ®s, vector, si_code, SIGTRAP) + == NOTIFY_STOP) + return; + force_sig_fault(SIGTRAP, si_code, (void __user *) ifa, + 0, __ISR_VALID, isr); + return; + + case 32: /* fp fault */ + case 33: /* fp trap */ + result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); + if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { + force_sig_fault(SIGFPE, FPE_FLTINV, (void __user *) iip, + 0, __ISR_VALID, isr); + } + return; + + case 34: + if (isr & 0x2) { + /* Lower-Privilege Transfer Trap */ + + /* If we disabled debug traps during an fsyscall, + * re-enable them here. + */ + if (test_thread_flag(TIF_DB_DISABLED)) { + clear_thread_flag(TIF_DB_DISABLED); + ia64_psr(®s)->db = 1; + } + + /* + * Just clear PSR.lp and then return immediately: + * all the interesting work (e.g., signal delivery) + * is done in the kernel exit path. + */ + ia64_psr(®s)->lp = 0; + return; + } else { + /* Unimplemented Instr. Address Trap */ + if (user_mode(®s)) { + force_sig_fault(SIGILL, ILL_BADIADDR, + (void __user *) iip, + 0, 0, 0); + return; + } + sprintf(buf, "Unimplemented Instruction Address fault"); + } + break; + + case 45: + printk(KERN_ERR "Unexpected IA-32 exception (Trap 45)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx\n", + iip, ifa, isr); + force_sig(SIGSEGV); + return; + + case 46: + printk(KERN_ERR "Unexpected IA-32 intercept trap (Trap 46)\n"); + printk(KERN_ERR " iip - 0x%lx, ifa - 0x%lx, isr - 0x%lx, iim - 0x%lx\n", + iip, ifa, isr, iim); + force_sig(SIGSEGV); + return; + + case 47: + sprintf(buf, "IA-32 Interruption Fault (int 0x%lx)", isr >> 16); + break; + + default: + sprintf(buf, "Fault %lu", vector); + break; + } + if (!die_if_kernel(buf, ®s, error)) + force_sig(SIGILL); +} diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c new file mode 100644 index 000000000000..0acb5a0cd7ab --- /dev/null +++ b/arch/ia64/kernel/unaligned.c @@ -0,0 +1,1560 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Architecture-specific unaligned trap handling. + * + * Copyright (C) 1999-2002, 2004 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + * + * 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix + * get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame + * stacked register returns an undefined value; it does NOT trigger a + * "rsvd register fault"). + * 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops. + * 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes. + * 2001/01/17 Add support emulation of unaligned kernel accesses. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +extern int die_if_kernel(char *str, struct pt_regs *regs, long err); + +#undef DEBUG_UNALIGNED_TRAP + +#ifdef DEBUG_UNALIGNED_TRAP +# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0) +# define DDUMP(str,vp,len) dump(str, vp, len) + +static void +dump (const char *str, void *vp, size_t len) +{ + unsigned char *cp = vp; + int i; + + printk("%s", str); + for (i = 0; i < len; ++i) + printk (" %02x", *cp++); + printk("\n"); +} +#else +# define DPRINT(a...) +# define DDUMP(str,vp,len) +#endif + +#define IA64_FIRST_STACKED_GR 32 +#define IA64_FIRST_ROTATING_FR 32 +#define SIGN_EXT9 0xffffffffffffff00ul + +/* + * sysctl settable hook which tells the kernel whether to honor the + * IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want + * to allow the super user to enable/disable this for security reasons + * (i.e. don't allow attacker to fill up logs with unaligned accesses). + */ +int no_unaligned_warning; +int unaligned_dump_stack; + +/* + * For M-unit: + * + * opcode | m | x6 | + * --------|------|---------| + * [40-37] | [36] | [35:30] | + * --------|------|---------| + * 4 | 1 | 6 | = 11 bits + * -------------------------- + * However bits [31:30] are not directly useful to distinguish between + * load/store so we can use [35:32] instead, which gives the following + * mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer + * checking the m-bit until later in the load/store emulation. + */ +#define IA64_OPCODE_MASK 0x1ef +#define IA64_OPCODE_SHIFT 32 + +/* + * Table C-28 Integer Load/Store + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill MUST be aligned because the RNATs are based on + * the address (bits [8:3]), so we must failed. + */ +#define LD_OP 0x080 +#define LDS_OP 0x081 +#define LDA_OP 0x082 +#define LDSA_OP 0x083 +#define LDBIAS_OP 0x084 +#define LDACQ_OP 0x085 +/* 0x086, 0x087 are not relevant */ +#define LDCCLR_OP 0x088 +#define LDCNC_OP 0x089 +#define LDCCLRACQ_OP 0x08a +#define ST_OP 0x08c +#define STREL_OP 0x08d +/* 0x08e,0x8f are not relevant */ + +/* + * Table C-29 Integer Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-30 Integer Load/Store +Imm + * + * We ignore [35:32]= 0x6, 0x7, 0xE, 0xF + * + * ld8.fill, st8.fill must be aligned because the Nat register are based on + * the address, so we must fail and the program must be fixed. + */ +#define LD_IMM_OP 0x0a0 +#define LDS_IMM_OP 0x0a1 +#define LDA_IMM_OP 0x0a2 +#define LDSA_IMM_OP 0x0a3 +#define LDBIAS_IMM_OP 0x0a4 +#define LDACQ_IMM_OP 0x0a5 +/* 0x0a6, 0xa7 are not relevant */ +#define LDCCLR_IMM_OP 0x0a8 +#define LDCNC_IMM_OP 0x0a9 +#define LDCCLRACQ_IMM_OP 0x0aa +#define ST_IMM_OP 0x0ac +#define STREL_IMM_OP 0x0ad +/* 0x0ae,0xaf are not relevant */ + +/* + * Table C-32 Floating-point Load/Store + */ +#define LDF_OP 0x0c0 +#define LDFS_OP 0x0c1 +#define LDFA_OP 0x0c2 +#define LDFSA_OP 0x0c3 +/* 0x0c6 is irrelevant */ +#define LDFCCLR_OP 0x0c8 +#define LDFCNC_OP 0x0c9 +/* 0x0cb is irrelevant */ +#define STF_OP 0x0cc + +/* + * Table C-33 Floating-point Load +Reg + * + * we use the ld->m (bit [36:36]) field to determine whether or not we have + * a load/store of this form. + */ + +/* + * Table C-34 Floating-point Load/Store +Imm + */ +#define LDF_IMM_OP 0x0e0 +#define LDFS_IMM_OP 0x0e1 +#define LDFA_IMM_OP 0x0e2 +#define LDFSA_IMM_OP 0x0e3 +/* 0x0e6 is irrelevant */ +#define LDFCCLR_IMM_OP 0x0e8 +#define LDFCNC_IMM_OP 0x0e9 +#define STF_IMM_OP 0x0ec + +typedef struct { + unsigned long qp:6; /* [0:5] */ + unsigned long r1:7; /* [6:12] */ + unsigned long imm:7; /* [13:19] */ + unsigned long r3:7; /* [20:26] */ + unsigned long x:1; /* [27:27] */ + unsigned long hint:2; /* [28:29] */ + unsigned long x6_sz:2; /* [30:31] */ + unsigned long x6_op:4; /* [32:35], x6 = x6_sz|x6_op */ + unsigned long m:1; /* [36:36] */ + unsigned long op:4; /* [37:40] */ + unsigned long pad:23; /* [41:63] */ +} load_store_t; + + +typedef enum { + UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */ + UPD_REG /* ldXZ r1=[r3],r2 */ +} update_t; + +/* + * We use tables to keep track of the offsets of registers in the saved state. + * This way we save having big switch/case statements. + * + * We use bit 0 to indicate switch_stack or pt_regs. + * The offset is simply shifted by 1 bit. + * A 2-byte value should be enough to hold any kind of offset + * + * In case the calling convention changes (and thus pt_regs/switch_stack) + * simply use RSW instead of RPT or vice-versa. + */ + +#define RPO(x) ((size_t) &((struct pt_regs *)0)->x) +#define RSO(x) ((size_t) &((struct switch_stack *)0)->x) + +#define RPT(x) (RPO(x) << 1) +#define RSW(x) (1| RSO(x)<<1) + +#define GR_OFFS(x) (gr_info[x]>>1) +#define GR_IN_SW(x) (gr_info[x] & 0x1) + +#define FR_OFFS(x) (fr_info[x]>>1) +#define FR_IN_SW(x) (fr_info[x] & 0x1) + +static u16 gr_info[32]={ + 0, /* r0 is read-only : WE SHOULD NEVER GET THIS */ + + RPT(r1), RPT(r2), RPT(r3), + + RSW(r4), RSW(r5), RSW(r6), RSW(r7), + + RPT(r8), RPT(r9), RPT(r10), RPT(r11), + RPT(r12), RPT(r13), RPT(r14), RPT(r15), + + RPT(r16), RPT(r17), RPT(r18), RPT(r19), + RPT(r20), RPT(r21), RPT(r22), RPT(r23), + RPT(r24), RPT(r25), RPT(r26), RPT(r27), + RPT(r28), RPT(r29), RPT(r30), RPT(r31) +}; + +static u16 fr_info[32]={ + 0, /* constant : WE SHOULD NEVER GET THIS */ + 0, /* constant : WE SHOULD NEVER GET THIS */ + + RSW(f2), RSW(f3), RSW(f4), RSW(f5), + + RPT(f6), RPT(f7), RPT(f8), RPT(f9), + RPT(f10), RPT(f11), + + RSW(f12), RSW(f13), RSW(f14), + RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19), + RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24), + RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29), + RSW(f30), RSW(f31) +}; + +/* Invalidate ALAT entry for integer register REGNO. */ +static void +invala_gr (int regno) +{ +# define F(reg) case reg: ia64_invala_gr(reg); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +/* Invalidate ALAT entry for floating-point register REGNO. */ +static void +invala_fr (int regno) +{ +# define F(reg) case reg: ia64_invala_fr(reg); break + + switch (regno) { + F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7); + F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15); + F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23); + F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31); + F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39); + F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47); + F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55); + F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63); + F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71); + F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79); + F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87); + F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95); + F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103); + F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111); + F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119); + F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127); + } +# undef F +} + +static inline unsigned long +rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg) +{ + reg += rrb; + if (reg >= sor) + reg -= sor; + return reg; +} + +static void +set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long *bsp, *bspstore, *addr, *rnat_addr, *ubs_end; + unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; + unsigned long rnats, nat_mask; + unsigned long on_kbs; + long sof = (regs->cr_ifs) & 0x7f; + long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx >= sof) { + /* this should never happen, as the "rsvd register fault" has higher priority */ + DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof); + return; + } + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", + r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); + addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); + if (addr >= kbs) { + /* the register is on the kernel backing store: easy... */ + rnat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) rnat_addr >= sw->ar_bspstore) + rnat_addr = &sw->ar_rnat; + nat_mask = 1UL << ia64_rse_slot_num(addr); + + *addr = val; + if (nat) + *rnat_addr |= nat_mask; + else + *rnat_addr &= ~nat_mask; + return; + } + + if (!user_stack(current, regs)) { + DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1); + return; + } + + bspstore = (unsigned long *)regs->ar_bspstore; + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -sof); + addr = ia64_rse_skip_regs(bsp, ridx); + + DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); + + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); + + rnat_addr = ia64_rse_rnat_addr(addr); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n", + (void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1); + + nat_mask = 1UL << ia64_rse_slot_num(addr); + if (nat) + rnats |= nat_mask; + else + rnats &= ~nat_mask; + ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats); + + DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats); +} + + +static void +get_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long *val, int *nat) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long *bsp, *addr, *rnat_addr, *ubs_end, *bspstore; + unsigned long *kbs = (void *) current + IA64_RBS_OFFSET; + unsigned long rnats, nat_mask; + unsigned long on_kbs; + long sof = (regs->cr_ifs) & 0x7f; + long sor = 8 * ((regs->cr_ifs >> 14) & 0xf); + long rrb_gr = (regs->cr_ifs >> 18) & 0x7f; + long ridx = r1 - 32; + + if (ridx >= sof) { + /* read of out-of-frame register returns an undefined value; 0 in our case. */ + DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof); + goto fail; + } + + if (ridx < sor) + ridx = rotate_reg(sor, rrb_gr, ridx); + + DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n", + r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx); + + on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore); + addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx); + if (addr >= kbs) { + /* the register is on the kernel backing store: easy... */ + *val = *addr; + if (nat) { + rnat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) rnat_addr >= sw->ar_bspstore) + rnat_addr = &sw->ar_rnat; + nat_mask = 1UL << ia64_rse_slot_num(addr); + *nat = (*rnat_addr & nat_mask) != 0; + } + return; + } + + if (!user_stack(current, regs)) { + DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1); + goto fail; + } + + bspstore = (unsigned long *)regs->ar_bspstore; + ubs_end = ia64_rse_skip_regs(bspstore, on_kbs); + bsp = ia64_rse_skip_regs(ubs_end, -sof); + addr = ia64_rse_skip_regs(bsp, ridx); + + DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void *) ubs_end, (void *) bsp, (void *) addr); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val); + + if (nat) { + rnat_addr = ia64_rse_rnat_addr(addr); + nat_mask = 1UL << ia64_rse_slot_num(addr); + + DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats); + + ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats); + *nat = (rnats & nat_mask) != 0; + } + return; + + fail: + *val = 0; + if (nat) + *nat = 0; + return; +} + + +static void +setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr; + unsigned long bitmask; + unsigned long *unat; + + /* + * First takes care of stacked registers + */ + if (regnum >= IA64_FIRST_STACKED_GR) { + set_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * Using r0 as a target raises a General Exception fault which has higher priority + * than the Unaligned Reference fault. + */ + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if (GR_IN_SW(regnum)) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n", + addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum)); + /* + * add offset from base of struct + * and do it ! + */ + addr += GR_OFFS(regnum); + + *(unsigned long *)addr = val; + + /* + * We need to clear the corresponding UNAT bit to fully emulate the load + * UNAT bit_pos = GR[r3]{8:3} form EAS-2.4 + */ + bitmask = 1UL << (addr >> 3 & 0x3f); + DPRINT("*0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void *) unat, *unat); + if (nat) { + *unat |= bitmask; + } else { + *unat &= ~bitmask; + } + DPRINT("*0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void *) unat,*unat); +} + +/* + * Return the (rotated) index for floating point register REGNUM (REGNUM must be in the + * range from 32-127, result is in the range from 0-95. + */ +static inline unsigned long +fph_index (struct pt_regs *regs, long regnum) +{ + unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f; + return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR)); +} + +static void +setfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *)regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than Unaligned + * Fault. Thus, when we get here, we know the partition is enabled. + * To update f32-f127, there are three choices: + * + * (1) save f32-f127 to thread.fph and update the values there + * (2) use a gigantic switch statement to directly access the registers + * (3) generate code on the fly to update the desired register + * + * For now, we are using approach (1). + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_sync_fph(current); + current->thread.fph[fph_index(regs, regnum)] = *fpval; + } else { + /* + * pt_regs or switch_stack ? + */ + if (FR_IN_SW(regnum)) { + addr = (unsigned long)sw; + } else { + addr = (unsigned long)regs; + } + + DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum)); + + addr += FR_OFFS(regnum); + *(struct ia64_fpreg *)addr = *fpval; + + /* + * mark the low partition as being used now + * + * It is highly unlikely that this bit is not already set, but + * let's do it for safety. + */ + regs->cr_ipsr |= IA64_PSR_MFL; + } +} + +/* + * Those 2 inline functions generate the spilled versions of the constant floating point + * registers which can be used with stfX + */ +static inline void +float_spill_f0 (struct ia64_fpreg *final) +{ + ia64_stf_spill(final, 0); +} + +static inline void +float_spill_f1 (struct ia64_fpreg *final) +{ + ia64_stf_spill(final, 1); +} + +static void +getfpreg (unsigned long regnum, struct ia64_fpreg *fpval, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr; + + /* + * From EAS-2.5: FPDisableFault has higher priority than + * Unaligned Fault. Thus, when we get here, we know the partition is + * enabled. + * + * When regnum > 31, the register is still live and we need to force a save + * to current->thread.fph to get access to it. See discussion in setfpreg() + * for reasons and other ways of doing this. + */ + if (regnum >= IA64_FIRST_ROTATING_FR) { + ia64_flush_fph(current); + *fpval = current->thread.fph[fph_index(regs, regnum)]; + } else { + /* + * f0 = 0.0, f1= 1.0. Those registers are constant and are thus + * not saved, we must generate their spilled form on the fly + */ + switch(regnum) { + case 0: + float_spill_f0(fpval); + break; + case 1: + float_spill_f1(fpval); + break; + default: + /* + * pt_regs or switch_stack ? + */ + addr = FR_IN_SW(regnum) ? (unsigned long)sw + : (unsigned long)regs; + + DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n", + FR_IN_SW(regnum), addr, FR_OFFS(regnum)); + + addr += FR_OFFS(regnum); + *fpval = *(struct ia64_fpreg *)addr; + } + } +} + + +static void +getreg (unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs) +{ + struct switch_stack *sw = (struct switch_stack *) regs - 1; + unsigned long addr, *unat; + + if (regnum >= IA64_FIRST_STACKED_GR) { + get_rse_reg(regs, regnum, val, nat); + return; + } + + /* + * take care of r0 (read-only always evaluate to 0) + */ + if (regnum == 0) { + *val = 0; + if (nat) + *nat = 0; + return; + } + + /* + * Now look at registers in [0-31] range and init correct UNAT + */ + if (GR_IN_SW(regnum)) { + addr = (unsigned long)sw; + unat = &sw->ar_unat; + } else { + addr = (unsigned long)regs; + unat = &sw->caller_unat; + } + + DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum)); + + addr += GR_OFFS(regnum); + + *val = *(unsigned long *)addr; + + /* + * do it only when requested + */ + if (nat) + *nat = (*unat >> (addr >> 3 & 0x3f)) & 0x1UL; +} + +static void +emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa) +{ + /* + * IMPORTANT: + * Given the way we handle unaligned speculative loads, we should + * not get to this point in the code but we keep this sanity check, + * just in case. + */ + if (ld.x6_op == 1 || ld.x6_op == 3) { + printk(KERN_ERR "%s: register update on speculative load, error\n", __func__); + if (die_if_kernel("unaligned reference on speculative load with register update\n", + regs, 30)) + return; + } + + + /* + * at this point, we know that the base register to update is valid i.e., + * it's not r0 + */ + if (type == UPD_IMMEDIATE) { + unsigned long imm; + + /* + * Load +Imm: ldXZ r1=[r3],imm(9) + * + * + * form imm9: [13:19] contain the first 7 bits + */ + imm = ld.x << 7 | ld.imm; + + /* + * sign extend (1+8bits) if m set + */ + if (ld.m) imm |= SIGN_EXT9; + + /* + * ifa == r3 and we know that the NaT bit on r3 was clear so + * we can directly use ifa. + */ + ifa += imm; + + setreg(ld.r3, ifa, 0, regs); + + DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa); + + } else if (ld.m) { + unsigned long r2; + int nat_r2; + + /* + * Load +Reg Opcode: ldXZ r1=[r3],r2 + * + * Note: that we update r3 even in the case of ldfX.a + * (where the load does not happen) + * + * The way the load algorithm works, we know that r3 does not + * have its NaT bit set (would have gotten NaT consumption + * before getting the unaligned fault). So we can use ifa + * which equals r3 at this point. + * + * IMPORTANT: + * The above statement holds ONLY because we know that we + * never reach this code when trying to do a ldX.s. + * If we ever make it to here on an ldfX.s then + */ + getreg(ld.imm, &r2, &nat_r2, regs); + + ifa += r2; + + /* + * propagate Nat r2 -> r3 + */ + setreg(ld.r3, ifa, nat_r2, regs); + + DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2); + } +} + +static int emulate_store(unsigned long ifa, void *val, int len, bool kernel_mode) +{ + if (kernel_mode) + return copy_to_kernel_nofault((void *)ifa, val, len); + + return copy_to_user((void __user *)ifa, val, len); +} + +static int emulate_load(void *val, unsigned long ifa, int len, bool kernel_mode) +{ + if (kernel_mode) + return copy_from_kernel_nofault(val, (void *)ifa, len); + + return copy_from_user(val, (void __user *)ifa, len); +} + +static int +emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs, + bool kernel_mode) +{ + unsigned int len = 1 << ld.x6_sz; + unsigned long val = 0; + + /* + * r0, as target, doesn't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * ldX.a we will emulate load and also invalidate the ALAT entry. + * See comment below for explanation on how we handle ldX.a + */ + + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; + } + /* this assumes little-endian byte-order: */ + if (emulate_load(&val, ifa, len, kernel_mode)) + return -1; + setreg(ld.r1, val, 0, regs); + + /* + * check for updates on any kind of loads + */ + if (ld.op == 0x5 || ld.m) + emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); + + /* + * handling of various loads (based on EAS2.4): + * + * ldX.acq (ordered load): + * - acquire semantics would have been used, so force fence instead. + * + * ldX.c.clr (check load and clear): + * - if we get to this handler, it's because the entry was not in the ALAT. + * Therefore the operation reverts to a normal load + * + * ldX.c.nc (check load no clear): + * - same as previous one + * + * ldX.c.clr.acq (ordered check load and clear): + * - same as above for c.clr part. The load needs to have acquire semantics. So + * we use the fence semantics which is stronger and thus ensures correctness. + * + * ldX.a (advanced load): + * - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the + * address doesn't match requested size alignment. This means that we would + * possibly need more than one load to get the result. + * + * The load part can be handled just like a normal load, however the difficult + * part is to get the right thing into the ALAT. The critical piece of information + * in the base address of the load & size. To do that, a ld.a must be executed, + * clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now + * if we use the same target register, we will be okay for the check.a instruction. + * If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry + * which would overlap within [r3,r3+X] (the size of the load was store in the + * ALAT). If such an entry is found the entry is invalidated. But this is not good + * enough, take the following example: + * r3=3 + * ld4.a r1=[r3] + * + * Could be emulated by doing: + * ld1.a r1=[r3],1 + * store to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3],1 + * store & shift to temporary; + * ld1.a r1=[r3] + * store & shift to temporary; + * r1=temporary + * + * So in this case, you would get the right value is r1 but the wrong info in + * the ALAT. Notice that you could do it in reverse to finish with address 3 + * but you would still get the size wrong. To get the size right, one needs to + * execute exactly the same kind of load. You could do it from a aligned + * temporary location, but you would get the address wrong. + * + * So no matter what, it is not possible to emulate an advanced load + * correctly. But is that really critical ? + * + * We will always convert ld.a into a normal load with ALAT invalidated. This + * will enable compiler to do optimization where certain code path after ld.a + * is not required to have ld.c/chk.a, e.g., code path with no intervening stores. + * + * If there is a store after the advanced load, one must either do a ld.c.* or + * chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no + * entry found in ALAT), and that's perfectly ok because: + * + * - ld.c.*, if the entry is not present a normal load is executed + * - chk.a.*, if the entry is not present, execution jumps to recovery code + * + * In either case, the load can be potentially retried in another form. + * + * ALAT must be invalidated for the register (so that chk.a or ld.c don't pick + * up a stale entry later). The register base update MUST also be performed. + */ + + /* + * when the load has the .acq completer then + * use ordering fence. + */ + if (ld.x6_op == 0x5 || ld.x6_op == 0xa) + mb(); + + /* + * invalidate ALAT entry in case of advanced load + */ + if (ld.x6_op == 0x2) + invala_gr(ld.r1); + + return 0; +} + +static int +emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs, + bool kernel_mode) +{ + unsigned long r2; + unsigned int len = 1 << ld.x6_sz; + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getreg(ld.imm, &r2, NULL, regs); + + /* + * we rely on the macros in unaligned.h for now i.e., + * we let the compiler figure out how to read memory gracefully. + * + * We need this switch/case because the way the inline function + * works. The code is optimized by the compiler and looks like + * a single switch/case. + */ + DPRINT("st%d [%lx]=%lx\n", len, ifa, r2); + + if (len != 2 && len != 4 && len != 8) { + DPRINT("unknown size: x6=%d\n", ld.x6_sz); + return -1; + } + + /* this assumes little-endian byte-order: */ + if (emulate_store(ifa, &r2, len, kernel_mode)) + return -1; + + /* + * stX [r3]=r2,imm(9) + * + * NOTE: + * ld.r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if (ld.op == 0x5) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld.x << 7 | ld.r1; + /* + * sign extend (8bits) if m set + */ + if (ld.m) imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT("imm=%lx r3=%lx\n", imm, ifa); + + setreg(ld.r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + /* + * stX.rel: use fence instead of release + */ + if (ld.x6_op == 0xd) + mb(); + + return 0; +} + +/* + * floating point operations sizes in bytes + */ +static const unsigned char float_fsz[4]={ + 10, /* extended precision (e) */ + 8, /* integer (8) */ + 4, /* single precision (s) */ + 8 /* double precision (d) */ +}; + +static inline void +mem2float_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfe(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf8(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_single (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfs(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +mem2float_double (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldfd(6, init); + ia64_stop(); + ia64_stf_spill(final, 6); +} + +static inline void +float2mem_extended (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfe(final, 6); +} + +static inline void +float2mem_integer (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stf8(final, 6); +} + +static inline void +float2mem_single (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfs(final, 6); +} + +static inline void +float2mem_double (struct ia64_fpreg *init, struct ia64_fpreg *final) +{ + ia64_ldf_fill(6, init); + ia64_stop(); + ia64_stfd(final, 6); +} + +static int +emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs, bool kernel_mode) +{ + struct ia64_fpreg fpr_init[2]; + struct ia64_fpreg fpr_final[2]; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction faults have + * higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an unaligned + * reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init, 0, sizeof(fpr_init)); + memset(&fpr_final, 0, sizeof(fpr_final)); + + /* + * ldfpX.a: we don't try to emulate anything but we must + * invalidate the ALAT entry and execute updates, if any. + */ + if (ld.x6_op != 0x2) { + /* + * This assumes little-endian byte-order. Note that there is no "ldfpe" + * instruction: + */ + if (emulate_load(&fpr_init[0], ifa, len, kernel_mode) + || emulate_load(&fpr_init[1], (ifa + len), len, kernel_mode)) + return -1; + + DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz); + DDUMP("frp_init =", &fpr_init, 2*len); + /* + * XXX fixme + * Could optimize inlines by using ldfpX & 2 spills + */ + switch( ld.x6_sz ) { + case 0: + mem2float_extended(&fpr_init[0], &fpr_final[0]); + mem2float_extended(&fpr_init[1], &fpr_final[1]); + break; + case 1: + mem2float_integer(&fpr_init[0], &fpr_final[0]); + mem2float_integer(&fpr_init[1], &fpr_final[1]); + break; + case 2: + mem2float_single(&fpr_init[0], &fpr_final[0]); + mem2float_single(&fpr_init[1], &fpr_final[1]); + break; + case 3: + mem2float_double(&fpr_init[0], &fpr_final[0]); + mem2float_double(&fpr_init[1], &fpr_final[1]); + break; + } + DDUMP("fpr_final =", &fpr_final, 2*len); + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final and directly + * use the storage from the saved context i.e., the actual final + * destination (pt_regs, switch_stack or thread structure). + */ + setfpreg(ld.r1, &fpr_final[0], regs); + setfpreg(ld.imm, &fpr_final[1], regs); + } + + /* + * Check for updates: only immediate updates are available for this + * instruction. + */ + if (ld.m) { + /* + * the immediate is implicit given the ldsz of the operation: + * single: 8 (2x4) and for all others it's 16 (2x8) + */ + ifa += len<<1; + + /* + * IMPORTANT: + * the fact that we force the NaT of r3 to zero is ONLY valid + * as long as we don't come here with a ldfpX.s. + * For this reason we keep this sanity check + */ + if (ld.x6_op == 1 || ld.x6_op == 3) + printk(KERN_ERR "%s: register update on speculative load pair, error\n", + __func__); + + setreg(ld.r3, ifa, 0, regs); + } + + /* + * Invalidate ALAT entries, if any, for both registers. + */ + if (ld.x6_op == 0x2) { + invala_fr(ld.r1); + invala_fr(ld.imm); + } + return 0; +} + + +static int +emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs, + bool kernel_mode) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * fr0 & fr1 don't need to be checked because Illegal Instruction + * faults have higher priority than unaligned faults. + * + * r0 cannot be found as the base as it would never generate an + * unaligned reference. + */ + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * ldfX.a we don't try to emulate anything but we must + * invalidate the ALAT entry. + * See comments in ldX for descriptions on how the various loads are handled. + */ + if (ld.x6_op != 0x2) { + if (emulate_load(&fpr_init, ifa, len, kernel_mode)) + return -1; + + DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); + DDUMP("fpr_init =", &fpr_init, len); + /* + * we only do something for x6_op={0,8,9} + */ + switch( ld.x6_sz ) { + case 0: + mem2float_extended(&fpr_init, &fpr_final); + break; + case 1: + mem2float_integer(&fpr_init, &fpr_final); + break; + case 2: + mem2float_single(&fpr_init, &fpr_final); + break; + case 3: + mem2float_double(&fpr_init, &fpr_final); + break; + } + DDUMP("fpr_final =", &fpr_final, len); + /* + * XXX fixme + * + * A possible optimization would be to drop fpr_final and directly + * use the storage from the saved context i.e., the actual final + * destination (pt_regs, switch_stack or thread structure). + */ + setfpreg(ld.r1, &fpr_final, regs); + } + + /* + * check for updates on any loads + */ + if (ld.op == 0x7 || ld.m) + emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa); + + /* + * invalidate ALAT entry in case of advanced floating point loads + */ + if (ld.x6_op == 0x2) + invala_fr(ld.r1); + + return 0; +} + + +static int +emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs, + bool kernel_mode) +{ + struct ia64_fpreg fpr_init; + struct ia64_fpreg fpr_final; + unsigned long len = float_fsz[ld.x6_sz]; + + /* + * make sure we get clean buffers + */ + memset(&fpr_init,0, sizeof(fpr_init)); + memset(&fpr_final,0, sizeof(fpr_final)); + + /* + * if we get to this handler, Nat bits on both r3 and r2 have already + * been checked. so we don't need to do it + * + * extract the value to be stored + */ + getfpreg(ld.imm, &fpr_init, regs); + /* + * during this step, we extract the spilled registers from the saved + * context i.e., we refill. Then we store (no spill) to temporary + * aligned location + */ + switch( ld.x6_sz ) { + case 0: + float2mem_extended(&fpr_init, &fpr_final); + break; + case 1: + float2mem_integer(&fpr_init, &fpr_final); + break; + case 2: + float2mem_single(&fpr_init, &fpr_final); + break; + case 3: + float2mem_double(&fpr_init, &fpr_final); + break; + } + DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz); + DDUMP("fpr_init =", &fpr_init, len); + DDUMP("fpr_final =", &fpr_final, len); + + if (emulate_store(ifa, &fpr_final, len, kernel_mode)) + return -1; + + /* + * stfX [r3]=r2,imm(9) + * + * NOTE: + * ld.r3 can never be r0, because r0 would not generate an + * unaligned access. + */ + if (ld.op == 0x7) { + unsigned long imm; + + /* + * form imm9: [12:6] contain first 7bits + */ + imm = ld.x << 7 | ld.r1; + /* + * sign extend (8bits) if m set + */ + if (ld.m) + imm |= SIGN_EXT9; + /* + * ifa == r3 (NaT is necessarily cleared) + */ + ifa += imm; + + DPRINT("imm=%lx r3=%lx\n", imm, ifa); + + setreg(ld.r3, ifa, 0, regs); + } + /* + * we don't have alat_invalidate_multiple() so we need + * to do the complete flush :-<< + */ + ia64_invala(); + + return 0; +} + +/* + * Make sure we log the unaligned access, so that user/sysadmin can notice it and + * eventually fix the program. However, we don't want to do that for every access so we + * pace it with jiffies. + */ +static DEFINE_RATELIMIT_STATE(logging_rate_limit, 5 * HZ, 5); + +void +ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) +{ + struct ia64_psr *ipsr = ia64_psr(regs); + unsigned long bundle[2]; + unsigned long opcode; + const struct exception_table_entry *eh = NULL; + union { + unsigned long l; + load_store_t insn; + } u; + int ret = -1; + bool kernel_mode = false; + + if (ia64_psr(regs)->be) { + /* we don't support big-endian accesses */ + if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0)) + return; + goto force_sigbus; + } + + /* + * Treat kernel accesses for which there is an exception handler entry the same as + * user-level unaligned accesses. Otherwise, a clever program could trick this + * handler into reading an arbitrary kernel addresses... + */ + if (!user_mode(regs)) + eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri); + if (user_mode(regs) || eh) { + if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0) + goto force_sigbus; + + if (!no_unaligned_warning && + !(current->thread.flags & IA64_THREAD_UAC_NOPRINT) && + __ratelimit(&logging_rate_limit)) + { + char buf[200]; /* comm[] is at most 16 bytes... */ + size_t len; + + len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, " + "ip=0x%016lx\n\r", current->comm, + task_pid_nr(current), + ifa, regs->cr_iip + ipsr->ri); + /* + * Don't call tty_write_message() if we're in the kernel; we might + * be holding locks... + */ + if (user_mode(regs)) { + struct tty_struct *tty = get_current_tty(); + tty_write_message(tty, buf); + tty_kref_put(tty); + } + buf[len-1] = '\0'; /* drop '\r' */ + /* watch for command names containing %s */ + printk(KERN_WARNING "%s", buf); + } else { + if (no_unaligned_warning) { + printk_once(KERN_WARNING "%s(%d) encountered an " + "unaligned exception which required\n" + "kernel assistance, which degrades " + "the performance of the application.\n" + "Unaligned exception warnings have " + "been disabled by the system " + "administrator\n" + "echo 0 > /proc/sys/kernel/ignore-" + "unaligned-usertrap to re-enable\n", + current->comm, task_pid_nr(current)); + } + } + } else { + if (__ratelimit(&logging_rate_limit)) { + printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n", + ifa, regs->cr_iip + ipsr->ri); + if (unaligned_dump_stack) + dump_stack(); + } + kernel_mode = true; + } + + DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n", + regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it); + + if (emulate_load(bundle, regs->cr_iip, 16, kernel_mode)) + goto failure; + + /* + * extract the instruction from the bundle given the slot number + */ + switch (ipsr->ri) { + default: + case 0: u.l = (bundle[0] >> 5); break; + case 1: u.l = (bundle[0] >> 46) | (bundle[1] << 18); break; + case 2: u.l = (bundle[1] >> 23); break; + } + opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK; + + DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d " + "ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm, + u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op); + + /* + * IMPORTANT: + * Notice that the switch statement DOES not cover all possible instructions + * that DO generate unaligned references. This is made on purpose because for some + * instructions it DOES NOT make sense to try and emulate the access. Sometimes it + * is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e., + * the program will get a signal and die: + * + * load/store: + * - ldX.spill + * - stX.spill + * Reason: RNATs are based on addresses + * - ld16 + * - st16 + * Reason: ld16 and st16 are supposed to occur in a single + * memory op + * + * synchronization: + * - cmpxchg + * - fetchadd + * - xchg + * Reason: ATOMIC operations cannot be emulated properly using multiple + * instructions. + * + * speculative loads: + * - ldX.sZ + * Reason: side effects, code must be ready to deal with failure so simpler + * to let the load fail. + * --------------------------------------------------------------------------------- + * XXX fixme + * + * I would like to get rid of this switch case and do something + * more elegant. + */ + switch (opcode) { + case LDS_OP: + case LDSA_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + fallthrough; + case LDS_IMM_OP: + case LDSA_IMM_OP: + case LDFS_OP: + case LDFSA_OP: + case LDFS_IMM_OP: + /* + * The instruction will be retried with deferred exceptions turned on, and + * we should get Nat bit installed + * + * IMPORTANT: When PSR_ED is set, the register & immediate update forms + * are actually executed even though the operation failed. So we don't + * need to take care of this. + */ + DPRINT("forcing PSR_ED\n"); + regs->cr_ipsr |= IA64_PSR_ED; + goto done; + + case LD_OP: + case LDA_OP: + case LDBIAS_OP: + case LDACQ_OP: + case LDCCLR_OP: + case LDCNC_OP: + case LDCCLRACQ_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + fallthrough; + case LD_IMM_OP: + case LDA_IMM_OP: + case LDBIAS_IMM_OP: + case LDACQ_IMM_OP: + case LDCCLR_IMM_OP: + case LDCNC_IMM_OP: + case LDCCLRACQ_IMM_OP: + ret = emulate_load_int(ifa, u.insn, regs, kernel_mode); + break; + + case ST_OP: + case STREL_OP: + if (u.insn.x) + /* oops, really a semaphore op (cmpxchg, etc) */ + goto failure; + fallthrough; + case ST_IMM_OP: + case STREL_IMM_OP: + ret = emulate_store_int(ifa, u.insn, regs, kernel_mode); + break; + + case LDF_OP: + case LDFA_OP: + case LDFCCLR_OP: + case LDFCNC_OP: + if (u.insn.x) + ret = emulate_load_floatpair(ifa, u.insn, regs, kernel_mode); + else + ret = emulate_load_float(ifa, u.insn, regs, kernel_mode); + break; + + case LDF_IMM_OP: + case LDFA_IMM_OP: + case LDFCCLR_IMM_OP: + case LDFCNC_IMM_OP: + ret = emulate_load_float(ifa, u.insn, regs, kernel_mode); + break; + + case STF_OP: + case STF_IMM_OP: + ret = emulate_store_float(ifa, u.insn, regs, kernel_mode); + break; + + default: + goto failure; + } + DPRINT("ret=%d\n", ret); + if (ret) + goto failure; + + if (ipsr->ri == 2) + /* + * given today's architecture this case is not likely to happen because a + * memory access instruction (M) can never be in the last slot of a + * bundle. But let's keep it for now. + */ + regs->cr_iip += 16; + ipsr->ri = (ipsr->ri + 1) & 0x3; + + DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip); + done: + return; + + failure: + /* something went wrong... */ + if (!user_mode(regs)) { + if (eh) { + ia64_handle_exception(regs, eh); + goto done; + } + if (die_if_kernel("error during unaligned kernel access\n", regs, ret)) + return; + /* NOT_REACHED */ + } + force_sigbus: + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) ifa, + 0, 0, 0); + goto done; +} diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c new file mode 100644 index 000000000000..a0fec82c56b8 --- /dev/null +++ b/arch/ia64/kernel/uncached.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2001-2008 Silicon Graphics, Inc. All rights reserved. + * + * A simple uncached page allocator using the generic allocator. This + * allocator first utilizes the spare (spill) pages found in the EFI + * memmap and will then start converting cached pages to uncached ones + * at a granule at a time. Node awareness is implemented by having a + * pool of pages per node. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct uncached_pool { + struct gen_pool *pool; + struct mutex add_chunk_mutex; /* serialize adding a converted chunk */ + int nchunks_added; /* #of converted chunks added to pool */ + atomic_t status; /* smp called function's return status*/ +}; + +#define MAX_CONVERTED_CHUNKS_PER_NODE 2 + +struct uncached_pool uncached_pools[MAX_NUMNODES]; + + +static void uncached_ipi_visibility(void *data) +{ + int status; + struct uncached_pool *uc_pool = (struct uncached_pool *)data; + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + if ((status != PAL_VISIBILITY_OK) && + (status != PAL_VISIBILITY_OK_REMOTE_NEEDED)) + atomic_inc(&uc_pool->status); +} + + +static void uncached_ipi_mc_drain(void *data) +{ + int status; + struct uncached_pool *uc_pool = (struct uncached_pool *)data; + + status = ia64_pal_mc_drain(); + if (status != PAL_STATUS_SUCCESS) + atomic_inc(&uc_pool->status); +} + + +/* + * Add a new chunk of uncached memory pages to the specified pool. + * + * @pool: pool to add new chunk of uncached memory to + * @nid: node id of node to allocate memory from, or -1 + * + * This is accomplished by first allocating a granule of cached memory pages + * and then converting them to uncached memory pages. + */ +static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid) +{ + struct page *page; + int status, i, nchunks_added = uc_pool->nchunks_added; + unsigned long c_addr, uc_addr; + + if (mutex_lock_interruptible(&uc_pool->add_chunk_mutex) != 0) + return -1; /* interrupted by a signal */ + + if (uc_pool->nchunks_added > nchunks_added) { + /* someone added a new chunk while we were waiting */ + mutex_unlock(&uc_pool->add_chunk_mutex); + return 0; + } + + if (uc_pool->nchunks_added >= MAX_CONVERTED_CHUNKS_PER_NODE) { + mutex_unlock(&uc_pool->add_chunk_mutex); + return -1; + } + + /* attempt to allocate a granule's worth of cached memory pages */ + + page = __alloc_pages_node(nid, + GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE, + IA64_GRANULE_SHIFT-PAGE_SHIFT); + if (!page) { + mutex_unlock(&uc_pool->add_chunk_mutex); + return -1; + } + + /* convert the memory pages from cached to uncached */ + + c_addr = (unsigned long)page_address(page); + uc_addr = c_addr - PAGE_OFFSET + __IA64_UNCACHED_OFFSET; + + /* + * There's a small race here where it's possible for someone to + * access the page through /dev/mem halfway through the conversion + * to uncached - not sure it's really worth bothering about + */ + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + SetPageUncached(&page[i]); + + flush_tlb_kernel_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); + + status = ia64_pal_prefetch_visibility(PAL_VISIBILITY_PHYSICAL); + if (status == PAL_VISIBILITY_OK_REMOTE_NEEDED) { + atomic_set(&uc_pool->status, 0); + smp_call_function(uncached_ipi_visibility, uc_pool, 1); + if (atomic_read(&uc_pool->status)) + goto failed; + } else if (status != PAL_VISIBILITY_OK) + goto failed; + + preempt_disable(); + + flush_icache_range(uc_addr, uc_addr + IA64_GRANULE_SIZE); + + /* flush the just introduced uncached translation from the TLB */ + local_flush_tlb_all(); + + preempt_enable(); + + status = ia64_pal_mc_drain(); + if (status != PAL_STATUS_SUCCESS) + goto failed; + atomic_set(&uc_pool->status, 0); + smp_call_function(uncached_ipi_mc_drain, uc_pool, 1); + if (atomic_read(&uc_pool->status)) + goto failed; + + /* + * The chunk of memory pages has been converted to uncached so now we + * can add it to the pool. + */ + status = gen_pool_add(uc_pool->pool, uc_addr, IA64_GRANULE_SIZE, nid); + if (status) + goto failed; + + uc_pool->nchunks_added++; + mutex_unlock(&uc_pool->add_chunk_mutex); + return 0; + + /* failed to convert or add the chunk so give it back to the kernel */ +failed: + for (i = 0; i < (IA64_GRANULE_SIZE / PAGE_SIZE); i++) + ClearPageUncached(&page[i]); + + free_pages(c_addr, IA64_GRANULE_SHIFT-PAGE_SHIFT); + mutex_unlock(&uc_pool->add_chunk_mutex); + return -1; +} + + +/* + * uncached_alloc_page + * + * @starting_nid: node id of node to start with, or -1 + * @n_pages: number of contiguous pages to allocate + * + * Allocate the specified number of contiguous uncached pages on the + * requested node. If not enough contiguous uncached pages are available + * on the requested node, roundrobin starting with the next higher node. + */ +unsigned long uncached_alloc_page(int starting_nid, int n_pages) +{ + unsigned long uc_addr; + struct uncached_pool *uc_pool; + int nid; + + if (unlikely(starting_nid >= MAX_NUMNODES)) + return 0; + + if (starting_nid < 0) + starting_nid = numa_node_id(); + nid = starting_nid; + + do { + if (!node_state(nid, N_HIGH_MEMORY)) + continue; + uc_pool = &uncached_pools[nid]; + if (uc_pool->pool == NULL) + continue; + do { + uc_addr = gen_pool_alloc(uc_pool->pool, + n_pages * PAGE_SIZE); + if (uc_addr != 0) + return uc_addr; + } while (uncached_add_chunk(uc_pool, nid) == 0); + + } while ((nid = (nid + 1) % MAX_NUMNODES) != starting_nid); + + return 0; +} +EXPORT_SYMBOL(uncached_alloc_page); + + +/* + * uncached_free_page + * + * @uc_addr: uncached address of first page to free + * @n_pages: number of contiguous pages to free + * + * Free the specified number of uncached pages. + */ +void uncached_free_page(unsigned long uc_addr, int n_pages) +{ + int nid = paddr_to_nid(uc_addr - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pools[nid].pool; + + if (unlikely(pool == NULL)) + return; + + if ((uc_addr & (0XFUL << 60)) != __IA64_UNCACHED_OFFSET) + panic("uncached_free_page invalid address %lx\n", uc_addr); + + gen_pool_free(pool, uc_addr, n_pages * PAGE_SIZE); +} +EXPORT_SYMBOL(uncached_free_page); + + +/* + * uncached_build_memmap, + * + * @uc_start: uncached starting address of a chunk of uncached memory + * @uc_end: uncached ending address of a chunk of uncached memory + * @arg: ignored, (NULL argument passed in on call to efi_memmap_walk_uc()) + * + * Called at boot time to build a map of pages that can be used for + * memory special operations. + */ +static int __init uncached_build_memmap(u64 uc_start, u64 uc_end, void *arg) +{ + int nid = paddr_to_nid(uc_start - __IA64_UNCACHED_OFFSET); + struct gen_pool *pool = uncached_pools[nid].pool; + size_t size = uc_end - uc_start; + + touch_softlockup_watchdog(); + + if (pool != NULL) { + memset((char *)uc_start, 0, size); + (void) gen_pool_add(pool, uc_start, size, nid); + } + return 0; +} + + +static int __init uncached_init(void) +{ + int nid; + + for_each_online_node(nid) { + uncached_pools[nid].pool = gen_pool_create(PAGE_SHIFT, nid); + mutex_init(&uncached_pools[nid].add_chunk_mutex); + } + + efi_memmap_walk_uc(uncached_build_memmap, NULL); + return 0; +} + +__initcall(uncached_init); diff --git a/arch/ia64/kernel/unwind.c b/arch/ia64/kernel/unwind.c new file mode 100644 index 000000000000..6bd64c35e691 --- /dev/null +++ b/arch/ia64/kernel/unwind.c @@ -0,0 +1,2320 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 1999-2004 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 2003 Fenghua Yu + * - Change pt_regs_off() to make it less dependent on pt_regs structure. + */ +/* + * This file implements call frame unwind support for the Linux + * kernel. Parsing and processing the unwind information is + * time-consuming, so this implementation translates the unwind + * descriptors into unwind scripts. These scripts are very simple + * (basically a sequence of assignments) and efficient to execute. + * They are cached for later re-use. Each script is specific for a + * given instruction pointer address and the set of predicate values + * that the script depends on (most unwind descriptors are + * unconditional and scripts often do not depend on predicates at + * all). This code is based on the unwind conventions described in + * the "IA-64 Software Conventions and Runtime Architecture" manual. + * + * SMP conventions: + * o updates to the global unwind data (in structure "unw") are serialized + * by the unw.lock spinlock + * o each unwind script has its own read-write lock; a thread must acquire + * a read lock before executing a script and must acquire a write lock + * before modifying a script + * o if both the unw.lock spinlock and a script's read-write lock must be + * acquired, then the read-write lock must be acquired first. + */ +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "entry.h" +#include "unwind_i.h" + +#define UNW_LOG_CACHE_SIZE 7 /* each unw_script is ~256 bytes in size */ +#define UNW_CACHE_SIZE (1 << UNW_LOG_CACHE_SIZE) + +#define UNW_LOG_HASH_SIZE (UNW_LOG_CACHE_SIZE + 1) +#define UNW_HASH_SIZE (1 << UNW_LOG_HASH_SIZE) + +#define UNW_STATS 0 /* WARNING: this disabled interrupts for long time-spans!! */ + +#ifdef UNW_DEBUG + static unsigned int unw_debug_level = UNW_DEBUG; +# define UNW_DEBUG_ON(n) unw_debug_level >= n + /* Do not code a printk level, not all debug lines end in newline */ +# define UNW_DPRINT(n, ...) if (UNW_DEBUG_ON(n)) printk(__VA_ARGS__) +# undef inline +# define inline +#else /* !UNW_DEBUG */ +# define UNW_DEBUG_ON(n) 0 +# define UNW_DPRINT(n, ...) +#endif /* UNW_DEBUG */ + +#if UNW_STATS +# define STAT(x...) x +#else +# define STAT(x...) +#endif + +#define alloc_reg_state() kmalloc(sizeof(struct unw_reg_state), GFP_ATOMIC) +#define free_reg_state(usr) kfree(usr) +#define alloc_labeled_state() kmalloc(sizeof(struct unw_labeled_state), GFP_ATOMIC) +#define free_labeled_state(usr) kfree(usr) + +typedef unsigned long unw_word; +typedef unsigned char unw_hash_index_t; + +static struct { + spinlock_t lock; /* spinlock for unwind data */ + + /* list of unwind tables (one per load-module) */ + struct unw_table *tables; + + unsigned long r0; /* constant 0 for r0 */ + + /* table of registers that prologues can save (and order in which they're saved): */ + const unsigned char save_order[8]; + + /* maps a preserved register index (preg_index) to corresponding switch_stack offset: */ + unsigned short sw_off[sizeof(struct unw_frame_info) / 8]; + + unsigned short lru_head; /* index of lead-recently used script */ + unsigned short lru_tail; /* index of most-recently used script */ + + /* index into unw_frame_info for preserved register i */ + unsigned short preg_index[UNW_NUM_REGS]; + + short pt_regs_offsets[32]; + + /* unwind table for the kernel: */ + struct unw_table kernel_table; + + /* unwind table describing the gate page (kernel code that is mapped into user space): */ + size_t gate_table_size; + unsigned long *gate_table; + + /* hash table that maps instruction pointer to script index: */ + unsigned short hash[UNW_HASH_SIZE]; + + /* script cache: */ + struct unw_script cache[UNW_CACHE_SIZE]; + +# ifdef UNW_DEBUG + const char *preg_name[UNW_NUM_REGS]; +# endif +# if UNW_STATS + struct { + struct { + int lookups; + int hinted_hits; + int normal_hits; + int collision_chain_traversals; + } cache; + struct { + unsigned long build_time; + unsigned long run_time; + unsigned long parse_time; + int builds; + int news; + int collisions; + int runs; + } script; + struct { + unsigned long init_time; + unsigned long unwind_time; + int inits; + int unwinds; + } api; + } stat; +# endif +} unw = { + .tables = &unw.kernel_table, + .lock = __SPIN_LOCK_UNLOCKED(unw.lock), + .save_order = { + UNW_REG_RP, UNW_REG_PFS, UNW_REG_PSP, UNW_REG_PR, + UNW_REG_UNAT, UNW_REG_LC, UNW_REG_FPSR, UNW_REG_PRI_UNAT_GR + }, + .preg_index = { + offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_GR */ + offsetof(struct unw_frame_info, pri_unat_loc)/8, /* PRI_UNAT_MEM */ + offsetof(struct unw_frame_info, bsp_loc)/8, + offsetof(struct unw_frame_info, bspstore_loc)/8, + offsetof(struct unw_frame_info, pfs_loc)/8, + offsetof(struct unw_frame_info, rnat_loc)/8, + offsetof(struct unw_frame_info, psp)/8, + offsetof(struct unw_frame_info, rp_loc)/8, + offsetof(struct unw_frame_info, r4)/8, + offsetof(struct unw_frame_info, r5)/8, + offsetof(struct unw_frame_info, r6)/8, + offsetof(struct unw_frame_info, r7)/8, + offsetof(struct unw_frame_info, unat_loc)/8, + offsetof(struct unw_frame_info, pr_loc)/8, + offsetof(struct unw_frame_info, lc_loc)/8, + offsetof(struct unw_frame_info, fpsr_loc)/8, + offsetof(struct unw_frame_info, b1_loc)/8, + offsetof(struct unw_frame_info, b2_loc)/8, + offsetof(struct unw_frame_info, b3_loc)/8, + offsetof(struct unw_frame_info, b4_loc)/8, + offsetof(struct unw_frame_info, b5_loc)/8, + offsetof(struct unw_frame_info, f2_loc)/8, + offsetof(struct unw_frame_info, f3_loc)/8, + offsetof(struct unw_frame_info, f4_loc)/8, + offsetof(struct unw_frame_info, f5_loc)/8, + offsetof(struct unw_frame_info, fr_loc[16 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[17 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[18 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[19 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[20 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[21 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[22 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[23 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[24 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[25 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[26 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[27 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[28 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[29 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[30 - 16])/8, + offsetof(struct unw_frame_info, fr_loc[31 - 16])/8, + }, + .pt_regs_offsets = { + [0] = -1, + offsetof(struct pt_regs, r1), + offsetof(struct pt_regs, r2), + offsetof(struct pt_regs, r3), + [4] = -1, [5] = -1, [6] = -1, [7] = -1, + offsetof(struct pt_regs, r8), + offsetof(struct pt_regs, r9), + offsetof(struct pt_regs, r10), + offsetof(struct pt_regs, r11), + offsetof(struct pt_regs, r12), + offsetof(struct pt_regs, r13), + offsetof(struct pt_regs, r14), + offsetof(struct pt_regs, r15), + offsetof(struct pt_regs, r16), + offsetof(struct pt_regs, r17), + offsetof(struct pt_regs, r18), + offsetof(struct pt_regs, r19), + offsetof(struct pt_regs, r20), + offsetof(struct pt_regs, r21), + offsetof(struct pt_regs, r22), + offsetof(struct pt_regs, r23), + offsetof(struct pt_regs, r24), + offsetof(struct pt_regs, r25), + offsetof(struct pt_regs, r26), + offsetof(struct pt_regs, r27), + offsetof(struct pt_regs, r28), + offsetof(struct pt_regs, r29), + offsetof(struct pt_regs, r30), + offsetof(struct pt_regs, r31), + }, + .hash = { [0 ... UNW_HASH_SIZE - 1] = -1 }, +#ifdef UNW_DEBUG + .preg_name = { + "pri_unat_gr", "pri_unat_mem", "bsp", "bspstore", "ar.pfs", "ar.rnat", "psp", "rp", + "r4", "r5", "r6", "r7", + "ar.unat", "pr", "ar.lc", "ar.fpsr", + "b1", "b2", "b3", "b4", "b5", + "f2", "f3", "f4", "f5", + "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", + "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31" + } +#endif +}; + +static inline int +read_only (void *addr) +{ + return (unsigned long) ((char *) addr - (char *) &unw.r0) < sizeof(unw.r0); +} + +/* + * Returns offset of rREG in struct pt_regs. + */ +static inline unsigned long +pt_regs_off (unsigned long reg) +{ + short off = -1; + + if (reg < ARRAY_SIZE(unw.pt_regs_offsets)) + off = unw.pt_regs_offsets[reg]; + + if (off < 0) { + UNW_DPRINT(0, "unwind.%s: bad scratch reg r%lu\n", __func__, reg); + off = 0; + } + return (unsigned long) off; +} + +static inline struct pt_regs * +get_scratch_regs (struct unw_frame_info *info) +{ + if (!info->pt) { + /* This should not happen with valid unwind info. */ + UNW_DPRINT(0, "unwind.%s: bad unwind info: resetting info->pt\n", __func__); + if (info->flags & UNW_FLAG_INTERRUPT_FRAME) + info->pt = (unsigned long) ((struct pt_regs *) info->psp - 1); + else + info->pt = info->sp - 16; + } + UNW_DPRINT(3, "unwind.%s: sp 0x%lx pt 0x%lx\n", __func__, info->sp, info->pt); + return (struct pt_regs *) info->pt; +} + +/* Unwind accessors. */ + +int +unw_access_gr (struct unw_frame_info *info, int regnum, unsigned long *val, char *nat, int write) +{ + unsigned long *addr, *nat_addr, nat_mask = 0, dummy_nat; + struct unw_ireg *ireg; + struct pt_regs *pt; + + if ((unsigned) regnum - 1 >= 127) { + if (regnum == 0 && !write) { + *val = 0; /* read r0 always returns 0 */ + *nat = 0; + return 0; + } + UNW_DPRINT(0, "unwind.%s: trying to access non-existent r%u\n", + __func__, regnum); + return -1; + } + + if (regnum < 32) { + if (regnum >= 4 && regnum <= 7) { + /* access a preserved register */ + ireg = &info->r4 + (regnum - 4); + addr = ireg->loc; + if (addr) { + nat_addr = addr + ireg->nat.off; + switch (ireg->nat.type) { + case UNW_NAT_VAL: + /* simulate getf.sig/setf.sig */ + if (write) { + if (*nat) { + /* write NaTVal and be done with it */ + addr[0] = 0; + addr[1] = 0x1fffe; + return 0; + } + addr[1] = 0x1003e; + } else { + if (addr[0] == 0 && addr[1] == 0x1ffe) { + /* return NaT and be done with it */ + *val = 0; + *nat = 1; + return 0; + } + } + fallthrough; + case UNW_NAT_NONE: + dummy_nat = 0; + nat_addr = &dummy_nat; + break; + + case UNW_NAT_MEMSTK: + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + break; + + case UNW_NAT_REGSTK: + nat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) addr < info->regstk.limit + || (unsigned long) addr >= info->regstk.top) + { + UNW_DPRINT(0, "unwind.%s: %p outside of regstk " + "[0x%lx-0x%lx)\n", + __func__, (void *) addr, + info->regstk.limit, + info->regstk.top); + return -1; + } + if ((unsigned long) nat_addr >= info->regstk.top) + nat_addr = &info->sw->ar_rnat; + nat_mask = (1UL << ia64_rse_slot_num(addr)); + break; + } + } else { + addr = &info->sw->r4 + (regnum - 4); + nat_addr = &info->sw->ar_unat; + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + } + } else { + /* access a scratch register */ + pt = get_scratch_regs(info); + addr = (unsigned long *) ((unsigned long)pt + pt_regs_off(regnum)); + if (info->pri_unat_loc) + nat_addr = info->pri_unat_loc; + else + nat_addr = &info->sw->caller_unat; + nat_mask = (1UL << ((long) addr & 0x1f8)/8); + } + } else { + /* access a stacked register */ + addr = ia64_rse_skip_regs((unsigned long *) info->bsp, regnum - 32); + nat_addr = ia64_rse_rnat_addr(addr); + if ((unsigned long) addr < info->regstk.limit + || (unsigned long) addr >= info->regstk.top) + { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to access register outside " + "of rbs\n", __func__); + return -1; + } + if ((unsigned long) nat_addr >= info->regstk.top) + nat_addr = &info->sw->ar_rnat; + nat_mask = (1UL << ia64_rse_slot_num(addr)); + } + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else { + *addr = *val; + if (*nat) + *nat_addr |= nat_mask; + else + *nat_addr &= ~nat_mask; + } + } else { + if ((*nat_addr & nat_mask) == 0) { + *val = *addr; + *nat = 0; + } else { + *val = 0; /* if register is a NaT, *addr may contain kernel data! */ + *nat = 1; + } + } + return 0; +} +EXPORT_SYMBOL(unw_access_gr); + +int +unw_access_br (struct unw_frame_info *info, int regnum, unsigned long *val, int write) +{ + unsigned long *addr; + struct pt_regs *pt; + + switch (regnum) { + /* scratch: */ + case 0: pt = get_scratch_regs(info); addr = &pt->b0; break; + case 6: pt = get_scratch_regs(info); addr = &pt->b6; break; + case 7: pt = get_scratch_regs(info); addr = &pt->b7; break; + + /* preserved: */ + case 1: case 2: case 3: case 4: case 5: + addr = *(&info->b1_loc + (regnum - 1)); + if (!addr) + addr = &info->sw->b1 + (regnum - 1); + break; + + default: + UNW_DPRINT(0, "unwind.%s: trying to access non-existent b%u\n", + __func__, regnum); + return -1; + } + if (write) + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_br); + +int +unw_access_fr (struct unw_frame_info *info, int regnum, struct ia64_fpreg *val, int write) +{ + struct ia64_fpreg *addr = NULL; + struct pt_regs *pt; + + if ((unsigned) (regnum - 2) >= 126) { + UNW_DPRINT(0, "unwind.%s: trying to access non-existent f%u\n", + __func__, regnum); + return -1; + } + + if (regnum <= 5) { + addr = *(&info->f2_loc + (regnum - 2)); + if (!addr) + addr = &info->sw->f2 + (regnum - 2); + } else if (regnum <= 15) { + if (regnum <= 11) { + pt = get_scratch_regs(info); + addr = &pt->f6 + (regnum - 6); + } + else + addr = &info->sw->f12 + (regnum - 12); + } else if (regnum <= 31) { + addr = info->fr_loc[regnum - 16]; + if (!addr) + addr = &info->sw->f16 + (regnum - 16); + } else { + struct task_struct *t = info->task; + + if (write) + ia64_sync_fph(t); + else + ia64_flush_fph(t); + addr = t->thread.fph + (regnum - 32); + } + + if (write) + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_fr); + +int +unw_access_ar (struct unw_frame_info *info, int regnum, unsigned long *val, int write) +{ + unsigned long *addr; + struct pt_regs *pt; + + switch (regnum) { + case UNW_AR_BSP: + addr = info->bsp_loc; + if (!addr) + addr = &info->sw->ar_bspstore; + break; + + case UNW_AR_BSPSTORE: + addr = info->bspstore_loc; + if (!addr) + addr = &info->sw->ar_bspstore; + break; + + case UNW_AR_PFS: + addr = info->pfs_loc; + if (!addr) + addr = &info->sw->ar_pfs; + break; + + case UNW_AR_RNAT: + addr = info->rnat_loc; + if (!addr) + addr = &info->sw->ar_rnat; + break; + + case UNW_AR_UNAT: + addr = info->unat_loc; + if (!addr) + addr = &info->sw->caller_unat; + break; + + case UNW_AR_LC: + addr = info->lc_loc; + if (!addr) + addr = &info->sw->ar_lc; + break; + + case UNW_AR_EC: + if (!info->cfm_loc) + return -1; + if (write) + *info->cfm_loc = + (*info->cfm_loc & ~(0x3fUL << 52)) | ((*val & 0x3f) << 52); + else + *val = (*info->cfm_loc >> 52) & 0x3f; + return 0; + + case UNW_AR_FPSR: + addr = info->fpsr_loc; + if (!addr) + addr = &info->sw->ar_fpsr; + break; + + case UNW_AR_RSC: + pt = get_scratch_regs(info); + addr = &pt->ar_rsc; + break; + + case UNW_AR_CCV: + pt = get_scratch_regs(info); + addr = &pt->ar_ccv; + break; + + case UNW_AR_CSD: + pt = get_scratch_regs(info); + addr = &pt->ar_csd; + break; + + case UNW_AR_SSD: + pt = get_scratch_regs(info); + addr = &pt->ar_ssd; + break; + + default: + UNW_DPRINT(0, "unwind.%s: trying to access non-existent ar%u\n", + __func__, regnum); + return -1; + } + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + } else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_ar); + +int +unw_access_pr (struct unw_frame_info *info, unsigned long *val, int write) +{ + unsigned long *addr; + + addr = info->pr_loc; + if (!addr) + addr = &info->sw->pr; + + if (write) { + if (read_only(addr)) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to write read-only location\n", + __func__); + } else + *addr = *val; + } else + *val = *addr; + return 0; +} +EXPORT_SYMBOL(unw_access_pr); + + +/* Routines to manipulate the state stack. */ + +static inline void +push (struct unw_state_record *sr) +{ + struct unw_reg_state *rs; + + rs = alloc_reg_state(); + if (!rs) { + printk(KERN_ERR "unwind: cannot stack reg state!\n"); + return; + } + memcpy(rs, &sr->curr, sizeof(*rs)); + sr->curr.next = rs; +} + +static void +pop (struct unw_state_record *sr) +{ + struct unw_reg_state *rs = sr->curr.next; + + if (!rs) { + printk(KERN_ERR "unwind: stack underflow!\n"); + return; + } + memcpy(&sr->curr, rs, sizeof(*rs)); + free_reg_state(rs); +} + +/* Make a copy of the state stack. Non-recursive to avoid stack overflows. */ +static struct unw_reg_state * +dup_state_stack (struct unw_reg_state *rs) +{ + struct unw_reg_state *copy, *prev = NULL, *first = NULL; + + while (rs) { + copy = alloc_reg_state(); + if (!copy) { + printk(KERN_ERR "unwind.dup_state_stack: out of memory\n"); + return NULL; + } + memcpy(copy, rs, sizeof(*copy)); + if (first) + prev->next = copy; + else + first = copy; + rs = rs->next; + prev = copy; + } + return first; +} + +/* Free all stacked register states (but not RS itself). */ +static void +free_state_stack (struct unw_reg_state *rs) +{ + struct unw_reg_state *p, *next; + + for (p = rs->next; p != NULL; p = next) { + next = p->next; + free_reg_state(p); + } + rs->next = NULL; +} + +/* Unwind decoder routines */ + +static enum unw_register_index __attribute_const__ +decode_abreg (unsigned char abreg, int memory) +{ + switch (abreg) { + case 0x04 ... 0x07: return UNW_REG_R4 + (abreg - 0x04); + case 0x22 ... 0x25: return UNW_REG_F2 + (abreg - 0x22); + case 0x30 ... 0x3f: return UNW_REG_F16 + (abreg - 0x30); + case 0x41 ... 0x45: return UNW_REG_B1 + (abreg - 0x41); + case 0x60: return UNW_REG_PR; + case 0x61: return UNW_REG_PSP; + case 0x62: return memory ? UNW_REG_PRI_UNAT_MEM : UNW_REG_PRI_UNAT_GR; + case 0x63: return UNW_REG_RP; + case 0x64: return UNW_REG_BSP; + case 0x65: return UNW_REG_BSPSTORE; + case 0x66: return UNW_REG_RNAT; + case 0x67: return UNW_REG_UNAT; + case 0x68: return UNW_REG_FPSR; + case 0x69: return UNW_REG_PFS; + case 0x6a: return UNW_REG_LC; + default: + break; + } + UNW_DPRINT(0, "unwind.%s: bad abreg=0x%x\n", __func__, abreg); + return UNW_REG_LC; +} + +static void +set_reg (struct unw_reg_info *reg, enum unw_where where, int when, unsigned long val) +{ + reg->val = val; + reg->where = where; + if (reg->when == UNW_WHEN_NEVER) + reg->when = when; +} + +static void +alloc_spill_area (unsigned long *offp, unsigned long regsize, + struct unw_reg_info *lo, struct unw_reg_info *hi) +{ + struct unw_reg_info *reg; + + for (reg = hi; reg >= lo; --reg) { + if (reg->where == UNW_WHERE_SPILL_HOME) { + reg->where = UNW_WHERE_PSPREL; + *offp -= regsize; + reg->val = *offp; + } + } +} + +static inline void +spill_next_when (struct unw_reg_info **regp, struct unw_reg_info *lim, unw_word t) +{ + struct unw_reg_info *reg; + + for (reg = *regp; reg <= lim; ++reg) { + if (reg->where == UNW_WHERE_SPILL_HOME) { + reg->when = t; + *regp = reg + 1; + return; + } + } + UNW_DPRINT(0, "unwind.%s: excess spill!\n", __func__); +} + +static inline void +finish_prologue (struct unw_state_record *sr) +{ + struct unw_reg_info *reg; + unsigned long off; + int i; + + /* + * First, resolve implicit register save locations (see Section "11.4.2.3 Rules + * for Using Unwind Descriptors", rule 3): + */ + for (i = 0; i < (int) ARRAY_SIZE(unw.save_order); ++i) { + reg = sr->curr.reg + unw.save_order[i]; + if (reg->where == UNW_WHERE_GR_SAVE) { + reg->where = UNW_WHERE_GR; + reg->val = sr->gr_save_loc++; + } + } + + /* + * Next, compute when the fp, general, and branch registers get + * saved. This must come before alloc_spill_area() because + * we need to know which registers are spilled to their home + * locations. + */ + if (sr->imask) { + unsigned char kind, mask = 0, *cp = sr->imask; + int t; + static const unsigned char limit[3] = { + UNW_REG_F31, UNW_REG_R7, UNW_REG_B5 + }; + struct unw_reg_info *(regs[3]); + + regs[0] = sr->curr.reg + UNW_REG_F2; + regs[1] = sr->curr.reg + UNW_REG_R4; + regs[2] = sr->curr.reg + UNW_REG_B1; + + for (t = 0; t < sr->region_len; ++t) { + if ((t & 3) == 0) + mask = *cp++; + kind = (mask >> 2*(3-(t & 3))) & 3; + if (kind > 0) + spill_next_when(®s[kind - 1], sr->curr.reg + limit[kind - 1], + sr->region_start + t); + } + } + /* + * Next, lay out the memory stack spill area: + */ + if (sr->any_spills) { + off = sr->spill_offset; + alloc_spill_area(&off, 16, sr->curr.reg + UNW_REG_F2, sr->curr.reg + UNW_REG_F31); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_B1, sr->curr.reg + UNW_REG_B5); + alloc_spill_area(&off, 8, sr->curr.reg + UNW_REG_R4, sr->curr.reg + UNW_REG_R7); + } +} + +/* + * Region header descriptors. + */ + +static void +desc_prologue (int body, unw_word rlen, unsigned char mask, unsigned char grsave, + struct unw_state_record *sr) +{ + int i, region_start; + + if (!(sr->in_body || sr->first_region)) + finish_prologue(sr); + sr->first_region = 0; + + /* check if we're done: */ + if (sr->when_target < sr->region_start + sr->region_len) { + sr->done = 1; + return; + } + + region_start = sr->region_start + sr->region_len; + + for (i = 0; i < sr->epilogue_count; ++i) + pop(sr); + sr->epilogue_count = 0; + sr->epilogue_start = UNW_WHEN_NEVER; + + sr->region_start = region_start; + sr->region_len = rlen; + sr->in_body = body; + + if (!body) { + push(sr); + + for (i = 0; i < 4; ++i) { + if (mask & 0x8) + set_reg(sr->curr.reg + unw.save_order[i], UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, grsave++); + mask <<= 1; + } + sr->gr_save_loc = grsave; + sr->any_spills = 0; + sr->imask = NULL; + sr->spill_offset = 0x10; /* default to psp+16 */ + } +} + +/* + * Prologue descriptors. + */ + +static inline void +desc_abi (unsigned char abi, unsigned char context, struct unw_state_record *sr) +{ + if (abi == 3 && context == 'i') { + sr->flags |= UNW_FLAG_INTERRUPT_FRAME; + UNW_DPRINT(3, "unwind.%s: interrupt frame\n", __func__); + } + else + UNW_DPRINT(0, "unwind%s: ignoring unwabi(abi=0x%x,context=0x%x)\n", + __func__, abi, context); +} + +static inline void +desc_br_gr (unsigned char brmask, unsigned char gr, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) { + if (brmask & 1) + set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + brmask >>= 1; + } +} + +static inline void +desc_br_mem (unsigned char brmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 5; ++i) { + if (brmask & 1) { + set_reg(sr->curr.reg + UNW_REG_B1 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + brmask >>= 1; + } +} + +static inline void +desc_frgr_mem (unsigned char grmask, unw_word frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } + for (i = 0; i < 20; ++i) { + if ((frmask & 1) != 0) { + int base = (i < 4) ? UNW_REG_F2 : UNW_REG_F16 - 4; + set_reg(sr->curr.reg + base + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_fr_mem (unsigned char frmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((frmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_F2 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + frmask >>= 1; + } +} + +static inline void +desc_gr_gr (unsigned char grmask, unsigned char gr, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_GR, + sr->region_start + sr->region_len - 1, gr++); + grmask >>= 1; + } +} + +static inline void +desc_gr_mem (unsigned char grmask, struct unw_state_record *sr) +{ + int i; + + for (i = 0; i < 4; ++i) { + if ((grmask & 1) != 0) { + set_reg(sr->curr.reg + UNW_REG_R4 + i, UNW_WHERE_SPILL_HOME, + sr->region_start + sr->region_len - 1, 0); + sr->any_spills = 1; + } + grmask >>= 1; + } +} + +static inline void +desc_mem_stack_f (unw_word t, unw_word size, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + UNW_REG_PSP, UNW_WHERE_NONE, + sr->region_start + min_t(int, t, sr->region_len - 1), 16*size); +} + +static inline void +desc_mem_stack_v (unw_word t, struct unw_state_record *sr) +{ + sr->curr.reg[UNW_REG_PSP].when = sr->region_start + min_t(int, t, sr->region_len - 1); +} + +static inline void +desc_reg_gr (unsigned char reg, unsigned char dst, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_GR, sr->region_start + sr->region_len - 1, dst); +} + +static inline void +desc_reg_psprel (unsigned char reg, unw_word pspoff, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_PSPREL, sr->region_start + sr->region_len - 1, + 0x10 - 4*pspoff); +} + +static inline void +desc_reg_sprel (unsigned char reg, unw_word spoff, struct unw_state_record *sr) +{ + set_reg(sr->curr.reg + reg, UNW_WHERE_SPREL, sr->region_start + sr->region_len - 1, + 4*spoff); +} + +static inline void +desc_rp_br (unsigned char dst, struct unw_state_record *sr) +{ + sr->return_link_reg = dst; +} + +static inline void +desc_reg_when (unsigned char regnum, unw_word t, struct unw_state_record *sr) +{ + struct unw_reg_info *reg = sr->curr.reg + regnum; + + if (reg->where == UNW_WHERE_NONE) + reg->where = UNW_WHERE_GR_SAVE; + reg->when = sr->region_start + min_t(int, t, sr->region_len - 1); +} + +static inline void +desc_spill_base (unw_word pspoff, struct unw_state_record *sr) +{ + sr->spill_offset = 0x10 - 4*pspoff; +} + +static inline unsigned char * +desc_spill_mask (unsigned char *imaskp, struct unw_state_record *sr) +{ + sr->imask = imaskp; + return imaskp + (2*sr->region_len + 7)/8; +} + +/* + * Body descriptors. + */ +static inline void +desc_epilogue (unw_word t, unw_word ecount, struct unw_state_record *sr) +{ + sr->epilogue_start = sr->region_start + sr->region_len - 1 - t; + sr->epilogue_count = ecount + 1; +} + +static inline void +desc_copy_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_labeled_state *ls; + + for (ls = sr->labeled_states; ls; ls = ls->next) { + if (ls->label == label) { + free_state_stack(&sr->curr); + memcpy(&sr->curr, &ls->saved_state, sizeof(sr->curr)); + sr->curr.next = dup_state_stack(ls->saved_state.next); + return; + } + } + printk(KERN_ERR "unwind: failed to find state labeled 0x%lx\n", label); +} + +static inline void +desc_label_state (unw_word label, struct unw_state_record *sr) +{ + struct unw_labeled_state *ls; + + ls = alloc_labeled_state(); + if (!ls) { + printk(KERN_ERR "unwind.desc_label_state(): out of memory\n"); + return; + } + ls->label = label; + memcpy(&ls->saved_state, &sr->curr, sizeof(ls->saved_state)); + ls->saved_state.next = dup_state_stack(sr->curr.next); + + /* insert into list of labeled states: */ + ls->next = sr->labeled_states; + sr->labeled_states = ls; +} + +/* + * General descriptors. + */ + +static inline int +desc_is_active (unsigned char qp, unw_word t, struct unw_state_record *sr) +{ + if (sr->when_target <= sr->region_start + min_t(int, t, sr->region_len - 1)) + return 0; + if (qp > 0) { + if ((sr->pr_val & (1UL << qp)) == 0) + return 0; + sr->pr_mask |= (1UL << qp); + } + return 1; +} + +static inline void +desc_restore_p (unsigned char qp, unw_word t, unsigned char abreg, struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 0); + r->where = UNW_WHERE_NONE; + r->when = UNW_WHEN_NEVER; + r->val = 0; +} + +static inline void +desc_spill_reg_p (unsigned char qp, unw_word t, unsigned char abreg, unsigned char x, + unsigned char ytreg, struct unw_state_record *sr) +{ + enum unw_where where = UNW_WHERE_GR; + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + if (x) + where = UNW_WHERE_BR; + else if (ytreg & 0x80) + where = UNW_WHERE_FR; + + r = sr->curr.reg + decode_abreg(abreg, 0); + r->where = where; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = (ytreg & 0x7f); +} + +static inline void +desc_spill_psprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word pspoff, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 1); + r->where = UNW_WHERE_PSPREL; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = 0x10 - 4*pspoff; +} + +static inline void +desc_spill_sprel_p (unsigned char qp, unw_word t, unsigned char abreg, unw_word spoff, + struct unw_state_record *sr) +{ + struct unw_reg_info *r; + + if (!desc_is_active(qp, t, sr)) + return; + + r = sr->curr.reg + decode_abreg(abreg, 1); + r->where = UNW_WHERE_SPREL; + r->when = sr->region_start + min_t(int, t, sr->region_len - 1); + r->val = 4*spoff; +} + +#define UNW_DEC_BAD_CODE(code) printk(KERN_ERR "unwind: unknown code 0x%02x\n", \ + code); + +/* + * region headers: + */ +#define UNW_DEC_PROLOGUE_GR(fmt,r,m,gr,arg) desc_prologue(0,r,m,gr,arg) +#define UNW_DEC_PROLOGUE(fmt,b,r,arg) desc_prologue(b,r,0,32,arg) +/* + * prologue descriptors: + */ +#define UNW_DEC_ABI(fmt,a,c,arg) desc_abi(a,c,arg) +#define UNW_DEC_BR_GR(fmt,b,g,arg) desc_br_gr(b,g,arg) +#define UNW_DEC_BR_MEM(fmt,b,arg) desc_br_mem(b,arg) +#define UNW_DEC_FRGR_MEM(fmt,g,f,arg) desc_frgr_mem(g,f,arg) +#define UNW_DEC_FR_MEM(fmt,f,arg) desc_fr_mem(f,arg) +#define UNW_DEC_GR_GR(fmt,m,g,arg) desc_gr_gr(m,g,arg) +#define UNW_DEC_GR_MEM(fmt,m,arg) desc_gr_mem(m,arg) +#define UNW_DEC_MEM_STACK_F(fmt,t,s,arg) desc_mem_stack_f(t,s,arg) +#define UNW_DEC_MEM_STACK_V(fmt,t,arg) desc_mem_stack_v(t,arg) +#define UNW_DEC_REG_GR(fmt,r,d,arg) desc_reg_gr(r,d,arg) +#define UNW_DEC_REG_PSPREL(fmt,r,o,arg) desc_reg_psprel(r,o,arg) +#define UNW_DEC_REG_SPREL(fmt,r,o,arg) desc_reg_sprel(r,o,arg) +#define UNW_DEC_REG_WHEN(fmt,r,t,arg) desc_reg_when(r,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_GR,t,arg) +#define UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) desc_reg_when(UNW_REG_PRI_UNAT_MEM,t,arg) +#define UNW_DEC_PRIUNAT_GR(fmt,r,arg) desc_reg_gr(UNW_REG_PRI_UNAT_GR,r,arg) +#define UNW_DEC_PRIUNAT_PSPREL(fmt,o,arg) desc_reg_psprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_PRIUNAT_SPREL(fmt,o,arg) desc_reg_sprel(UNW_REG_PRI_UNAT_MEM,o,arg) +#define UNW_DEC_RP_BR(fmt,d,arg) desc_rp_br(d,arg) +#define UNW_DEC_SPILL_BASE(fmt,o,arg) desc_spill_base(o,arg) +#define UNW_DEC_SPILL_MASK(fmt,m,arg) (m = desc_spill_mask(m,arg)) +/* + * body descriptors: + */ +#define UNW_DEC_EPILOGUE(fmt,t,c,arg) desc_epilogue(t,c,arg) +#define UNW_DEC_COPY_STATE(fmt,l,arg) desc_copy_state(l,arg) +#define UNW_DEC_LABEL_STATE(fmt,l,arg) desc_label_state(l,arg) +/* + * general unwind descriptors: + */ +#define UNW_DEC_SPILL_REG_P(f,p,t,a,x,y,arg) desc_spill_reg_p(p,t,a,x,y,arg) +#define UNW_DEC_SPILL_REG(f,t,a,x,y,arg) desc_spill_reg_p(0,t,a,x,y,arg) +#define UNW_DEC_SPILL_PSPREL_P(f,p,t,a,o,arg) desc_spill_psprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_PSPREL(f,t,a,o,arg) desc_spill_psprel_p(0,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL_P(f,p,t,a,o,arg) desc_spill_sprel_p(p,t,a,o,arg) +#define UNW_DEC_SPILL_SPREL(f,t,a,o,arg) desc_spill_sprel_p(0,t,a,o,arg) +#define UNW_DEC_RESTORE_P(f,p,t,a,arg) desc_restore_p(p,t,a,arg) +#define UNW_DEC_RESTORE(f,t,a,arg) desc_restore_p(0,t,a,arg) + +#include "unwind_decoder.c" + + +/* Unwind scripts. */ + +static inline unw_hash_index_t +hash (unsigned long ip) +{ + /* magic number = ((sqrt(5)-1)/2)*2^64 */ + static const unsigned long hashmagic = 0x9e3779b97f4a7c16UL; + + return (ip >> 4) * hashmagic >> (64 - UNW_LOG_HASH_SIZE); +} + +static inline long +cache_match (struct unw_script *script, unsigned long ip, unsigned long pr) +{ + read_lock(&script->lock); + if (ip == script->ip && ((pr ^ script->pr_val) & script->pr_mask) == 0) + /* keep the read lock... */ + return 1; + read_unlock(&script->lock); + return 0; +} + +static inline struct unw_script * +script_lookup (struct unw_frame_info *info) +{ + struct unw_script *script = unw.cache + info->hint; + unsigned short index; + unsigned long ip, pr; + + if (UNW_DEBUG_ON(0)) + return NULL; /* Always regenerate scripts in debug mode */ + + STAT(++unw.stat.cache.lookups); + + ip = info->ip; + pr = info->pr; + + if (cache_match(script, ip, pr)) { + STAT(++unw.stat.cache.hinted_hits); + return script; + } + + index = unw.hash[hash(ip)]; + if (index >= UNW_CACHE_SIZE) + return NULL; + + script = unw.cache + index; + while (1) { + if (cache_match(script, ip, pr)) { + /* update hint; no locking required as single-word writes are atomic */ + STAT(++unw.stat.cache.normal_hits); + unw.cache[info->prev_script].hint = script - unw.cache; + return script; + } + if (script->coll_chain >= UNW_HASH_SIZE) + return NULL; + script = unw.cache + script->coll_chain; + STAT(++unw.stat.cache.collision_chain_traversals); + } +} + +/* + * On returning, a write lock for the SCRIPT is still being held. + */ +static inline struct unw_script * +script_new (unsigned long ip) +{ + struct unw_script *script, *prev, *tmp; + unw_hash_index_t index; + unsigned short head; + + STAT(++unw.stat.script.news); + + /* + * Can't (easily) use cmpxchg() here because of ABA problem + * that is intrinsic in cmpxchg()... + */ + head = unw.lru_head; + script = unw.cache + head; + unw.lru_head = script->lru_chain; + + /* + * We'd deadlock here if we interrupted a thread that is holding a read lock on + * script->lock. Thus, if the write_trylock() fails, we simply bail out. The + * alternative would be to disable interrupts whenever we hold a read-lock, but + * that seems silly. + */ + if (!write_trylock(&script->lock)) + return NULL; + + /* re-insert script at the tail of the LRU chain: */ + unw.cache[unw.lru_tail].lru_chain = head; + unw.lru_tail = head; + + /* remove the old script from the hash table (if it's there): */ + if (script->ip) { + index = hash(script->ip); + tmp = unw.cache + unw.hash[index]; + prev = NULL; + while (1) { + if (tmp == script) { + if (prev) + prev->coll_chain = tmp->coll_chain; + else + unw.hash[index] = tmp->coll_chain; + break; + } else + prev = tmp; + if (tmp->coll_chain >= UNW_CACHE_SIZE) + /* old script wasn't in the hash-table */ + break; + tmp = unw.cache + tmp->coll_chain; + } + } + + /* enter new script in the hash table */ + index = hash(ip); + script->coll_chain = unw.hash[index]; + unw.hash[index] = script - unw.cache; + + script->ip = ip; /* set new IP while we're holding the locks */ + + STAT(if (script->coll_chain < UNW_CACHE_SIZE) ++unw.stat.script.collisions); + + script->flags = 0; + script->hint = 0; + script->count = 0; + return script; +} + +static void +script_finalize (struct unw_script *script, struct unw_state_record *sr) +{ + script->pr_mask = sr->pr_mask; + script->pr_val = sr->pr_val; + /* + * We could down-grade our write-lock on script->lock here but + * the rwlock API doesn't offer atomic lock downgrading, so + * we'll just keep the write-lock and release it later when + * we're done using the script. + */ +} + +static inline void +script_emit (struct unw_script *script, struct unw_insn insn) +{ + if (script->count >= UNW_MAX_SCRIPT_LEN) { + UNW_DPRINT(0, "unwind.%s: script exceeds maximum size of %u instructions!\n", + __func__, UNW_MAX_SCRIPT_LEN); + return; + } + script->insn[script->count++] = insn; +} + +static inline void +emit_nat_info (struct unw_state_record *sr, int i, struct unw_script *script) +{ + struct unw_reg_info *r = sr->curr.reg + i; + enum unw_insn_opcode opc; + struct unw_insn insn; + unsigned long val = 0; + + switch (r->where) { + case UNW_WHERE_GR: + if (r->val >= 32) { + /* register got spilled to a stacked register */ + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_REGSTK; + } else + /* register got spilled to a scratch register */ + opc = UNW_INSN_SETNAT_MEMSTK; + break; + + case UNW_WHERE_FR: + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_VAL; + break; + + case UNW_WHERE_BR: + opc = UNW_INSN_SETNAT_TYPE; + val = UNW_NAT_NONE; + break; + + case UNW_WHERE_PSPREL: + case UNW_WHERE_SPREL: + opc = UNW_INSN_SETNAT_MEMSTK; + break; + + default: + UNW_DPRINT(0, "unwind.%s: don't know how to emit nat info for where = %u\n", + __func__, r->where); + return; + } + insn.opc = opc; + insn.dst = unw.preg_index[i]; + insn.val = val; + script_emit(script, insn); +} + +static void +compile_reg (struct unw_state_record *sr, int i, struct unw_script *script) +{ + struct unw_reg_info *r = sr->curr.reg + i; + enum unw_insn_opcode opc; + unsigned long val, rval; + struct unw_insn insn; + long need_nat_info; + + if (r->where == UNW_WHERE_NONE || r->when >= sr->when_target) + return; + + opc = UNW_INSN_MOVE; + val = rval = r->val; + need_nat_info = (i >= UNW_REG_R4 && i <= UNW_REG_R7); + + switch (r->where) { + case UNW_WHERE_GR: + if (rval >= 32) { + opc = UNW_INSN_MOVE_STACKED; + val = rval - 32; + } else if (rval >= 4 && rval <= 7) { + if (need_nat_info) { + opc = UNW_INSN_MOVE2; + need_nat_info = 0; + } + val = unw.preg_index[UNW_REG_R4 + (rval - 4)]; + } else if (rval == 0) { + opc = UNW_INSN_MOVE_CONST; + val = 0; + } else { + /* register got spilled to a scratch register */ + opc = UNW_INSN_MOVE_SCRATCH; + val = pt_regs_off(rval); + } + break; + + case UNW_WHERE_FR: + if (rval <= 5) + val = unw.preg_index[UNW_REG_F2 + (rval - 2)]; + else if (rval >= 16 && rval <= 31) + val = unw.preg_index[UNW_REG_F16 + (rval - 16)]; + else { + opc = UNW_INSN_MOVE_SCRATCH; + if (rval <= 11) + val = offsetof(struct pt_regs, f6) + 16*(rval - 6); + else + UNW_DPRINT(0, "unwind.%s: kernel may not touch f%lu\n", + __func__, rval); + } + break; + + case UNW_WHERE_BR: + if (rval >= 1 && rval <= 5) + val = unw.preg_index[UNW_REG_B1 + (rval - 1)]; + else { + opc = UNW_INSN_MOVE_SCRATCH; + if (rval == 0) + val = offsetof(struct pt_regs, b0); + else if (rval == 6) + val = offsetof(struct pt_regs, b6); + else + val = offsetof(struct pt_regs, b7); + } + break; + + case UNW_WHERE_SPREL: + opc = UNW_INSN_ADD_SP; + break; + + case UNW_WHERE_PSPREL: + opc = UNW_INSN_ADD_PSP; + break; + + default: + UNW_DPRINT(0, "unwind%s: register %u has unexpected `where' value of %u\n", + __func__, i, r->where); + break; + } + insn.opc = opc; + insn.dst = unw.preg_index[i]; + insn.val = val; + script_emit(script, insn); + if (need_nat_info) + emit_nat_info(sr, i, script); + + if (i == UNW_REG_PSP) { + /* + * info->psp must contain the _value_ of the previous + * sp, not it's save location. We get this by + * dereferencing the value we just stored in + * info->psp: + */ + insn.opc = UNW_INSN_LOAD; + insn.dst = insn.val = unw.preg_index[UNW_REG_PSP]; + script_emit(script, insn); + } +} + +static inline const struct unw_table_entry * +lookup (struct unw_table *table, unsigned long rel_ip) +{ + const struct unw_table_entry *e = NULL; + unsigned long lo, hi, mid; + + /* do a binary search for right entry: */ + for (lo = 0, hi = table->length; lo < hi; ) { + mid = (lo + hi) / 2; + e = &table->array[mid]; + if (rel_ip < e->start_offset) + hi = mid; + else if (rel_ip >= e->end_offset) + lo = mid + 1; + else + break; + } + if (rel_ip < e->start_offset || rel_ip >= e->end_offset) + return NULL; + return e; +} + +/* + * Build an unwind script that unwinds from state OLD_STATE to the + * entrypoint of the function that called OLD_STATE. + */ +static inline struct unw_script * +build_script (struct unw_frame_info *info) +{ + const struct unw_table_entry *e = NULL; + struct unw_script *script = NULL; + struct unw_labeled_state *ls, *next; + unsigned long ip = info->ip; + struct unw_state_record sr; + struct unw_table *table, *prev; + struct unw_reg_info *r; + struct unw_insn insn; + u8 *dp, *desc_end; + u64 hdr; + int i; + STAT(unsigned long start, parse_start;) + + STAT(++unw.stat.script.builds; start = ia64_get_itc()); + + /* build state record */ + memset(&sr, 0, sizeof(sr)); + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) + r->when = UNW_WHEN_NEVER; + sr.pr_val = info->pr; + + UNW_DPRINT(3, "unwind.%s: ip 0x%lx\n", __func__, ip); + script = script_new(ip); + if (!script) { + UNW_DPRINT(0, "unwind.%s: failed to create unwind script\n", __func__); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return NULL; + } + unw.cache[info->prev_script].hint = script - unw.cache; + + /* search the kernels and the modules' unwind tables for IP: */ + + STAT(parse_start = ia64_get_itc()); + + prev = NULL; + for (table = unw.tables; table; table = table->next) { + if (ip >= table->start && ip < table->end) { + /* + * Leave the kernel unwind table at the very front, + * lest moving it breaks some assumption elsewhere. + * Otherwise, move the matching table to the second + * position in the list so that traversals can benefit + * from commonality in backtrace paths. + */ + if (prev && prev != unw.tables) { + /* unw is safe - we're already spinlocked */ + prev->next = table->next; + table->next = unw.tables->next; + unw.tables->next = table; + } + e = lookup(table, ip - table->segment_base); + break; + } + prev = table; + } + if (!e) { + /* no info, return default unwinder (leaf proc, no mem stack, no saved regs) */ + UNW_DPRINT(1, "unwind.%s: no unwind info for ip=0x%lx (prev ip=0x%lx)\n", + __func__, ip, unw.cache[info->prev_script].ip); + sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + sr.curr.reg[UNW_REG_RP].when = -1; + sr.curr.reg[UNW_REG_RP].val = 0; + compile_reg(&sr, UNW_REG_RP, script); + script_finalize(script, &sr); + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return script; + } + + sr.when_target = (3*((ip & ~0xfUL) - (table->segment_base + e->start_offset))/16 + + (ip & 0xfUL)); + hdr = *(u64 *) (table->segment_base + e->info_offset); + dp = (u8 *) (table->segment_base + e->info_offset + 8); + desc_end = dp + 8*UNW_LENGTH(hdr); + + while (!sr.done && dp < desc_end) + dp = unw_decode(dp, sr.in_body, &sr); + + if (sr.when_target > sr.epilogue_start) { + /* + * sp has been restored and all values on the memory stack below + * psp also have been restored. + */ + sr.curr.reg[UNW_REG_PSP].val = 0; + sr.curr.reg[UNW_REG_PSP].where = UNW_WHERE_NONE; + sr.curr.reg[UNW_REG_PSP].when = UNW_WHEN_NEVER; + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) + if ((r->where == UNW_WHERE_PSPREL && r->val <= 0x10) + || r->where == UNW_WHERE_SPREL) + { + r->val = 0; + r->where = UNW_WHERE_NONE; + r->when = UNW_WHEN_NEVER; + } + } + + script->flags = sr.flags; + + /* + * If RP did't get saved, generate entry for the return link + * register. + */ + if (sr.curr.reg[UNW_REG_RP].when >= sr.when_target) { + sr.curr.reg[UNW_REG_RP].where = UNW_WHERE_BR; + sr.curr.reg[UNW_REG_RP].when = -1; + sr.curr.reg[UNW_REG_RP].val = sr.return_link_reg; + UNW_DPRINT(1, "unwind.%s: using default for rp at ip=0x%lx where=%d val=0x%lx\n", + __func__, ip, sr.curr.reg[UNW_REG_RP].where, + sr.curr.reg[UNW_REG_RP].val); + } + +#ifdef UNW_DEBUG + UNW_DPRINT(1, "unwind.%s: state record for func 0x%lx, t=%u:\n", + __func__, table->segment_base + e->start_offset, sr.when_target); + for (r = sr.curr.reg; r < sr.curr.reg + UNW_NUM_REGS; ++r) { + if (r->where != UNW_WHERE_NONE || r->when != UNW_WHEN_NEVER) { + UNW_DPRINT(1, " %s <- ", unw.preg_name[r - sr.curr.reg]); + switch (r->where) { + case UNW_WHERE_GR: UNW_DPRINT(1, "r%lu", r->val); break; + case UNW_WHERE_FR: UNW_DPRINT(1, "f%lu", r->val); break; + case UNW_WHERE_BR: UNW_DPRINT(1, "b%lu", r->val); break; + case UNW_WHERE_SPREL: UNW_DPRINT(1, "[sp+0x%lx]", r->val); break; + case UNW_WHERE_PSPREL: UNW_DPRINT(1, "[psp+0x%lx]", r->val); break; + case UNW_WHERE_NONE: + UNW_DPRINT(1, "%s+0x%lx", unw.preg_name[r - sr.curr.reg], r->val); + break; + + default: + UNW_DPRINT(1, "BADWHERE(%d)", r->where); + break; + } + UNW_DPRINT(1, "\t\t%d\n", r->when); + } + } +#endif + + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + + /* translate state record into unwinder instructions: */ + + /* + * First, set psp if we're dealing with a fixed-size frame; + * subsequent instructions may depend on this value. + */ + if (sr.when_target > sr.curr.reg[UNW_REG_PSP].when + && (sr.curr.reg[UNW_REG_PSP].where == UNW_WHERE_NONE) + && sr.curr.reg[UNW_REG_PSP].val != 0) { + /* new psp is sp plus frame size */ + insn.opc = UNW_INSN_ADD; + insn.dst = offsetof(struct unw_frame_info, psp)/8; + insn.val = sr.curr.reg[UNW_REG_PSP].val; /* frame size */ + script_emit(script, insn); + } + + /* determine where the primary UNaT is: */ + if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else if (sr.when_target < sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when) + i = UNW_REG_PRI_UNAT_GR; + else if (sr.curr.reg[UNW_REG_PRI_UNAT_MEM].when > sr.curr.reg[UNW_REG_PRI_UNAT_GR].when) + i = UNW_REG_PRI_UNAT_MEM; + else + i = UNW_REG_PRI_UNAT_GR; + + compile_reg(&sr, i, script); + + for (i = UNW_REG_BSP; i < UNW_NUM_REGS; ++i) + compile_reg(&sr, i, script); + + /* free labeled register states & stack: */ + + STAT(parse_start = ia64_get_itc()); + for (ls = sr.labeled_states; ls; ls = next) { + next = ls->next; + free_state_stack(&ls->saved_state); + free_labeled_state(ls); + } + free_state_stack(&sr.curr); + STAT(unw.stat.script.parse_time += ia64_get_itc() - parse_start); + + script_finalize(script, &sr); + STAT(unw.stat.script.build_time += ia64_get_itc() - start); + return script; +} + +/* + * Apply the unwinding actions represented by OPS and update SR to + * reflect the state that existed upon entry to the function that this + * unwinder represents. + */ +static inline void +run_script (struct unw_script *script, struct unw_frame_info *state) +{ + struct unw_insn *ip, *limit, next_insn; + unsigned long opc, dst, val, off; + unsigned long *s = (unsigned long *) state; + STAT(unsigned long start;) + + STAT(++unw.stat.script.runs; start = ia64_get_itc()); + state->flags = script->flags; + ip = script->insn; + limit = script->insn + script->count; + next_insn = *ip; + + while (ip++ < limit) { + opc = next_insn.opc; + dst = next_insn.dst; + val = next_insn.val; + next_insn = *ip; + + redo: + switch (opc) { + case UNW_INSN_ADD: + s[dst] += val; + break; + + case UNW_INSN_MOVE2: + if (!s[val]) + goto lazy_init; + s[dst+1] = s[val+1]; + s[dst] = s[val]; + break; + + case UNW_INSN_MOVE: + if (!s[val]) + goto lazy_init; + s[dst] = s[val]; + break; + + case UNW_INSN_MOVE_SCRATCH: + if (state->pt) { + s[dst] = (unsigned long) get_scratch_regs(state) + val; + } else { + s[dst] = 0; + UNW_DPRINT(0, "unwind.%s: no state->pt, dst=%ld, val=%ld\n", + __func__, dst, val); + } + break; + + case UNW_INSN_MOVE_CONST: + if (val == 0) + s[dst] = (unsigned long) &unw.r0; + else { + s[dst] = 0; + UNW_DPRINT(0, "unwind.%s: UNW_INSN_MOVE_CONST bad val=%ld\n", + __func__, val); + } + break; + + + case UNW_INSN_MOVE_STACKED: + s[dst] = (unsigned long) ia64_rse_skip_regs((unsigned long *)state->bsp, + val); + break; + + case UNW_INSN_ADD_PSP: + s[dst] = state->psp + val; + break; + + case UNW_INSN_ADD_SP: + s[dst] = state->sp + val; + break; + + case UNW_INSN_SETNAT_MEMSTK: + if (!state->pri_unat_loc) + state->pri_unat_loc = &state->sw->caller_unat; + /* register off. is a multiple of 8, so the least 3 bits (type) are 0 */ + s[dst+1] = ((unsigned long) state->pri_unat_loc - s[dst]) | UNW_NAT_MEMSTK; + break; + + case UNW_INSN_SETNAT_TYPE: + s[dst+1] = val; + break; + + case UNW_INSN_LOAD: +#ifdef UNW_DEBUG + if ((s[val] & (local_cpu_data->unimpl_va_mask | 0x7)) != 0 + || s[val] < TASK_SIZE) + { + UNW_DPRINT(0, "unwind.%s: rejecting bad psp=0x%lx\n", + __func__, s[val]); + break; + } +#endif + s[dst] = *(unsigned long *) s[val]; + break; + } + } + STAT(unw.stat.script.run_time += ia64_get_itc() - start); + return; + + lazy_init: + off = unw.sw_off[val]; + s[val] = (unsigned long) state->sw + off; + if (off >= offsetof(struct switch_stack, r4) && off <= offsetof(struct switch_stack, r7)) + /* + * We're initializing a general register: init NaT info, too. Note that + * the offset is a multiple of 8 which gives us the 3 bits needed for + * the type field. + */ + s[val+1] = (offsetof(struct switch_stack, ar_unat) - off) | UNW_NAT_MEMSTK; + goto redo; +} + +static int +find_save_locs (struct unw_frame_info *info) +{ + int have_write_lock = 0; + struct unw_script *scr; + unsigned long flags = 0; + + if ((info->ip & (local_cpu_data->unimpl_va_mask | 0xf)) || info->ip < TASK_SIZE) { + /* don't let obviously bad addresses pollute the cache */ + /* FIXME: should really be level 0 but it occurs too often. KAO */ + UNW_DPRINT(1, "unwind.%s: rejecting bad ip=0x%lx\n", __func__, info->ip); + info->rp_loc = NULL; + return -1; + } + + scr = script_lookup(info); + if (!scr) { + spin_lock_irqsave(&unw.lock, flags); + scr = build_script(info); + if (!scr) { + spin_unlock_irqrestore(&unw.lock, flags); + UNW_DPRINT(0, + "unwind.%s: failed to locate/build unwind script for ip %lx\n", + __func__, info->ip); + return -1; + } + have_write_lock = 1; + } + info->hint = scr->hint; + info->prev_script = scr - unw.cache; + + run_script(scr, info); + + if (have_write_lock) { + write_unlock(&scr->lock); + spin_unlock_irqrestore(&unw.lock, flags); + } else + read_unlock(&scr->lock); + return 0; +} + +static int +unw_valid(const struct unw_frame_info *info, unsigned long* p) +{ + unsigned long loc = (unsigned long)p; + return (loc >= info->regstk.limit && loc < info->regstk.top) || + (loc >= info->memstk.top && loc < info->memstk.limit); +} + +int +unw_unwind (struct unw_frame_info *info) +{ + unsigned long prev_ip, prev_sp, prev_bsp; + unsigned long ip, pr, num_regs; + STAT(unsigned long start, flags;) + int retval; + + STAT(local_irq_save(flags); ++unw.stat.api.unwinds; start = ia64_get_itc()); + + prev_ip = info->ip; + prev_sp = info->sp; + prev_bsp = info->bsp; + + /* validate the return IP pointer */ + if (!unw_valid(info, info->rp_loc)) { + /* FIXME: should really be level 0 but it occurs too often. KAO */ + UNW_DPRINT(1, "unwind.%s: failed to locate return link (ip=0x%lx)!\n", + __func__, info->ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + /* restore the ip */ + ip = info->ip = *info->rp_loc; + if (ip < GATE_ADDR) { + UNW_DPRINT(2, "unwind.%s: reached user-space (ip=0x%lx)\n", __func__, ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* validate the previous stack frame pointer */ + if (!unw_valid(info, info->pfs_loc)) { + UNW_DPRINT(0, "unwind.%s: failed to locate ar.pfs!\n", __func__); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + /* restore the cfm: */ + info->cfm_loc = info->pfs_loc; + + /* restore the bsp: */ + pr = info->pr; + num_regs = 0; + if ((info->flags & UNW_FLAG_INTERRUPT_FRAME)) { + info->pt = info->sp + 16; + if ((pr & (1UL << PRED_NON_SYSCALL)) != 0) + num_regs = *info->cfm_loc & 0x7f; /* size of frame */ + info->pfs_loc = + (unsigned long *) (info->pt + offsetof(struct pt_regs, ar_pfs)); + UNW_DPRINT(3, "unwind.%s: interrupt_frame pt 0x%lx\n", __func__, info->pt); + } else + num_regs = (*info->cfm_loc >> 7) & 0x7f; /* size of locals */ + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->bsp, -num_regs); + if (info->bsp < info->regstk.limit || info->bsp > info->regstk.top) { + UNW_DPRINT(0, "unwind.%s: bsp (0x%lx) out of range [0x%lx-0x%lx]\n", + __func__, info->bsp, info->regstk.limit, info->regstk.top); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* restore the sp: */ + info->sp = info->psp; + if (info->sp < info->memstk.top || info->sp > info->memstk.limit) { + UNW_DPRINT(0, "unwind.%s: sp (0x%lx) out of range [0x%lx-0x%lx]\n", + __func__, info->sp, info->memstk.top, info->memstk.limit); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + if (info->ip == prev_ip && info->sp == prev_sp && info->bsp == prev_bsp) { + UNW_DPRINT(0, "unwind.%s: ip, sp, bsp unchanged; stopping here (ip=0x%lx)\n", + __func__, ip); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return -1; + } + + /* as we unwind, the saved ar.unat becomes the primary unat: */ + info->pri_unat_loc = info->unat_loc; + + /* finally, restore the predicates: */ + unw_get_pr(info, &info->pr); + + retval = find_save_locs(info); + STAT(unw.stat.api.unwind_time += ia64_get_itc() - start; local_irq_restore(flags)); + return retval; +} +EXPORT_SYMBOL(unw_unwind); + +int +unw_unwind_to_user (struct unw_frame_info *info) +{ + unsigned long ip, sp, pr = info->pr; + + do { + unw_get_sp(info, &sp); + if ((long)((unsigned long)info->task + IA64_STK_OFFSET - sp) + < IA64_PT_REGS_SIZE) { + UNW_DPRINT(0, "unwind.%s: ran off the top of the kernel stack\n", + __func__); + break; + } + if (unw_is_intr_frame(info) && + (pr & (1UL << PRED_USER_STACK))) + return 0; + if (unw_get_pr (info, &pr) < 0) { + unw_get_rp(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to read " + "predicate register (ip=0x%lx)\n", + __func__, ip); + return -1; + } + } while (unw_unwind(info) >= 0); + unw_get_ip(info, &ip); + UNW_DPRINT(0, "unwind.%s: failed to unwind to user-level (ip=0x%lx)\n", + __func__, ip); + return -1; +} +EXPORT_SYMBOL(unw_unwind_to_user); + +static void +init_frame_info (struct unw_frame_info *info, struct task_struct *t, + struct switch_stack *sw, unsigned long stktop) +{ + unsigned long rbslimit, rbstop, stklimit; + STAT(unsigned long start, flags;) + + STAT(local_irq_save(flags); ++unw.stat.api.inits; start = ia64_get_itc()); + + /* + * Subtle stuff here: we _could_ unwind through the switch_stack frame but we + * don't want to do that because it would be slow as each preserved register would + * have to be processed. Instead, what we do here is zero out the frame info and + * start the unwind process at the function that created the switch_stack frame. + * When a preserved value in switch_stack needs to be accessed, run_script() will + * initialize the appropriate pointer on demand. + */ + memset(info, 0, sizeof(*info)); + + rbslimit = (unsigned long) t + IA64_RBS_OFFSET; + stklimit = (unsigned long) t + IA64_STK_OFFSET; + + rbstop = sw->ar_bspstore; + if (rbstop > stklimit || rbstop < rbslimit) + rbstop = rbslimit; + + if (stktop <= rbstop) + stktop = rbstop; + if (stktop > stklimit) + stktop = stklimit; + + info->regstk.limit = rbslimit; + info->regstk.top = rbstop; + info->memstk.limit = stklimit; + info->memstk.top = stktop; + info->task = t; + info->sw = sw; + info->sp = info->psp = stktop; + info->pr = sw->pr; + UNW_DPRINT(3, "unwind.%s:\n" + " task 0x%lx\n" + " rbs = [0x%lx-0x%lx)\n" + " stk = [0x%lx-0x%lx)\n" + " pr 0x%lx\n" + " sw 0x%lx\n" + " sp 0x%lx\n", + __func__, (unsigned long) t, rbslimit, rbstop, stktop, stklimit, + info->pr, (unsigned long) info->sw, info->sp); + STAT(unw.stat.api.init_time += ia64_get_itc() - start; local_irq_restore(flags)); +} + +void +unw_init_frame_info (struct unw_frame_info *info, struct task_struct *t, struct switch_stack *sw) +{ + unsigned long sol; + + init_frame_info(info, t, sw, (unsigned long) (sw + 1) - 16); + info->cfm_loc = &sw->ar_pfs; + sol = (*info->cfm_loc >> 7) & 0x7f; + info->bsp = (unsigned long) ia64_rse_skip_regs((unsigned long *) info->regstk.top, -sol); + info->ip = sw->b0; + UNW_DPRINT(3, "unwind.%s:\n" + " bsp 0x%lx\n" + " sol 0x%lx\n" + " ip 0x%lx\n", + __func__, info->bsp, sol, info->ip); + find_save_locs(info); +} + +EXPORT_SYMBOL(unw_init_frame_info); + +void +unw_init_from_blocked_task (struct unw_frame_info *info, struct task_struct *t) +{ + struct switch_stack *sw = (struct switch_stack *) (t->thread.ksp + 16); + + UNW_DPRINT(1, "unwind.%s\n", __func__); + unw_init_frame_info(info, t, sw); +} +EXPORT_SYMBOL(unw_init_from_blocked_task); + +static void +init_unwind_table (struct unw_table *table, const char *name, unsigned long segment_base, + unsigned long gp, const void *table_start, const void *table_end) +{ + const struct unw_table_entry *start = table_start, *end = table_end; + + table->name = name; + table->segment_base = segment_base; + table->gp = gp; + table->start = segment_base + start[0].start_offset; + table->end = segment_base + end[-1].end_offset; + table->array = start; + table->length = end - start; +} + +void * +unw_add_unwind_table (const char *name, unsigned long segment_base, unsigned long gp, + const void *table_start, const void *table_end) +{ + const struct unw_table_entry *start = table_start, *end = table_end; + struct unw_table *table; + unsigned long flags; + + if (end - start <= 0) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to insert empty unwind table\n", + __func__); + return NULL; + } + + table = kmalloc(sizeof(*table), GFP_USER); + if (!table) + return NULL; + + init_unwind_table(table, name, segment_base, gp, table_start, table_end); + + spin_lock_irqsave(&unw.lock, flags); + { + /* keep kernel unwind table at the front (it's searched most commonly): */ + table->next = unw.tables->next; + unw.tables->next = table; + } + spin_unlock_irqrestore(&unw.lock, flags); + + return table; +} + +void +unw_remove_unwind_table (void *handle) +{ + struct unw_table *table, *prev; + struct unw_script *tmp; + unsigned long flags; + long index; + + if (!handle) { + UNW_DPRINT(0, "unwind.%s: ignoring attempt to remove non-existent unwind table\n", + __func__); + return; + } + + table = handle; + if (table == &unw.kernel_table) { + UNW_DPRINT(0, "unwind.%s: sorry, freeing the kernel's unwind table is a " + "no-can-do!\n", __func__); + return; + } + + spin_lock_irqsave(&unw.lock, flags); + { + /* first, delete the table: */ + + for (prev = (struct unw_table *) &unw.tables; prev; prev = prev->next) + if (prev->next == table) + break; + if (!prev) { + UNW_DPRINT(0, "unwind.%s: failed to find unwind table %p\n", + __func__, (void *) table); + spin_unlock_irqrestore(&unw.lock, flags); + return; + } + prev->next = table->next; + } + spin_unlock_irqrestore(&unw.lock, flags); + + /* next, remove hash table entries for this table */ + + for (index = 0; index < UNW_HASH_SIZE; ++index) { + tmp = unw.cache + unw.hash[index]; + if (unw.hash[index] >= UNW_CACHE_SIZE + || tmp->ip < table->start || tmp->ip >= table->end) + continue; + + write_lock(&tmp->lock); + { + if (tmp->ip >= table->start && tmp->ip < table->end) { + unw.hash[index] = tmp->coll_chain; + tmp->ip = 0; + } + } + write_unlock(&tmp->lock); + } + + kfree(table); +} + +static int __init +create_gate_table (void) +{ + const struct unw_table_entry *entry, *start, *end; + unsigned long *lp, segbase = GATE_ADDR; + size_t info_size, size; + char *info; + Elf64_Phdr *punw = NULL, *phdr = (Elf64_Phdr *) (GATE_ADDR + GATE_EHDR->e_phoff); + int i; + + for (i = 0; i < GATE_EHDR->e_phnum; ++i, ++phdr) + if (phdr->p_type == PT_IA_64_UNWIND) { + punw = phdr; + break; + } + + if (!punw) { + printk("%s: failed to find gate DSO's unwind table!\n", __func__); + return 0; + } + + start = (const struct unw_table_entry *) punw->p_vaddr; + end = (struct unw_table_entry *) ((char *) start + punw->p_memsz); + size = 0; + + unw_add_unwind_table("linux-gate.so", segbase, 0, start, end); + + for (entry = start; entry < end; ++entry) + size += 3*8 + 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + size += 8; /* reserve space for "end of table" marker */ + + unw.gate_table = kmalloc(size, GFP_KERNEL); + if (!unw.gate_table) { + unw.gate_table_size = 0; + printk(KERN_ERR "%s: unable to create unwind data for gate page!\n", __func__); + return 0; + } + unw.gate_table_size = size; + + lp = unw.gate_table; + info = (char *) unw.gate_table + size; + + for (entry = start; entry < end; ++entry, lp += 3) { + info_size = 8 + 8*UNW_LENGTH(*(u64 *) (segbase + entry->info_offset)); + info -= info_size; + memcpy(info, (char *) segbase + entry->info_offset, info_size); + + lp[0] = segbase + entry->start_offset; /* start */ + lp[1] = segbase + entry->end_offset; /* end */ + lp[2] = info - (char *) unw.gate_table; /* info */ + } + *lp = 0; /* end-of-table marker */ + return 0; +} + +__initcall(create_gate_table); + +void __init +unw_init (void) +{ + extern char __gp[]; + extern void unw_hash_index_t_is_too_narrow (void); + long i, off; + + if (8*sizeof(unw_hash_index_t) < UNW_LOG_HASH_SIZE) + unw_hash_index_t_is_too_narrow(); + + unw.sw_off[unw.preg_index[UNW_REG_PRI_UNAT_GR]] = SW(CALLER_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_BSPSTORE]] = SW(AR_BSPSTORE); + unw.sw_off[unw.preg_index[UNW_REG_PFS]] = SW(AR_PFS); + unw.sw_off[unw.preg_index[UNW_REG_RP]] = SW(B0); + unw.sw_off[unw.preg_index[UNW_REG_UNAT]] = SW(CALLER_UNAT); + unw.sw_off[unw.preg_index[UNW_REG_PR]] = SW(PR); + unw.sw_off[unw.preg_index[UNW_REG_LC]] = SW(AR_LC); + unw.sw_off[unw.preg_index[UNW_REG_FPSR]] = SW(AR_FPSR); + for (i = UNW_REG_R4, off = SW(R4); i <= UNW_REG_R7; ++i, off += 8) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_B1, off = SW(B1); i <= UNW_REG_B5; ++i, off += 8) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_F2, off = SW(F2); i <= UNW_REG_F5; ++i, off += 16) + unw.sw_off[unw.preg_index[i]] = off; + for (i = UNW_REG_F16, off = SW(F16); i <= UNW_REG_F31; ++i, off += 16) + unw.sw_off[unw.preg_index[i]] = off; + + for (i = 0; i < UNW_CACHE_SIZE; ++i) { + if (i > 0) + unw.cache[i].lru_chain = (i - 1); + unw.cache[i].coll_chain = -1; + rwlock_init(&unw.cache[i].lock); + } + unw.lru_head = UNW_CACHE_SIZE - 1; + unw.lru_tail = 0; + + init_unwind_table(&unw.kernel_table, "kernel", KERNEL_START, (unsigned long) __gp, + __start_unwind, __end_unwind); +} + +/* + * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED + * + * This system call has been deprecated. The new and improved way to get + * at the kernel's unwind info is via the gate DSO. The address of the + * ELF header for this DSO is passed to user-level via AT_SYSINFO_EHDR. + * + * DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED DEPRECATED + * + * This system call copies the unwind data into the buffer pointed to by BUF and returns + * the size of the unwind data. If BUF_SIZE is smaller than the size of the unwind data + * or if BUF is NULL, nothing is copied, but the system call still returns the size of the + * unwind data. + * + * The first portion of the unwind data contains an unwind table and rest contains the + * associated unwind info (in no particular order). The unwind table consists of a table + * of entries of the form: + * + * u64 start; (64-bit address of start of function) + * u64 end; (64-bit address of start of function) + * u64 info; (BUF-relative offset to unwind info) + * + * The end of the unwind table is indicated by an entry with a START address of zero. + * + * Please see the IA-64 Software Conventions and Runtime Architecture manual for details + * on the format of the unwind info. + * + * ERRORS + * EFAULT BUF points outside your accessible address space. + */ +asmlinkage long +sys_getunwind (void __user *buf, size_t buf_size) +{ + if (buf && buf_size >= unw.gate_table_size) + if (copy_to_user(buf, unw.gate_table, unw.gate_table_size) != 0) + return -EFAULT; + return unw.gate_table_size; +} diff --git a/arch/ia64/kernel/unwind_decoder.c b/arch/ia64/kernel/unwind_decoder.c new file mode 100644 index 000000000000..83f54f7929b5 --- /dev/null +++ b/arch/ia64/kernel/unwind_decoder.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang + * + * Generic IA-64 unwind info decoder. + * + * This file is used both by the Linux kernel and objdump. Please keep + * the two copies of this file in sync. + * + * You need to customize the decoder by defining the following + * macros/constants before including this file: + * + * Types: + * unw_word Unsigned integer type with at least 64 bits + * + * Register names: + * UNW_REG_BSP + * UNW_REG_BSPSTORE + * UNW_REG_FPSR + * UNW_REG_LC + * UNW_REG_PFS + * UNW_REG_PR + * UNW_REG_RNAT + * UNW_REG_PSP + * UNW_REG_RP + * UNW_REG_UNAT + * + * Decoder action macros: + * UNW_DEC_BAD_CODE(code) + * UNW_DEC_ABI(fmt,abi,context,arg) + * UNW_DEC_BR_GR(fmt,brmask,gr,arg) + * UNW_DEC_BR_MEM(fmt,brmask,arg) + * UNW_DEC_COPY_STATE(fmt,label,arg) + * UNW_DEC_EPILOGUE(fmt,t,ecount,arg) + * UNW_DEC_FRGR_MEM(fmt,grmask,frmask,arg) + * UNW_DEC_FR_MEM(fmt,frmask,arg) + * UNW_DEC_GR_GR(fmt,grmask,gr,arg) + * UNW_DEC_GR_MEM(fmt,grmask,arg) + * UNW_DEC_LABEL_STATE(fmt,label,arg) + * UNW_DEC_MEM_STACK_F(fmt,t,size,arg) + * UNW_DEC_MEM_STACK_V(fmt,t,arg) + * UNW_DEC_PRIUNAT_GR(fmt,r,arg) + * UNW_DEC_PRIUNAT_WHEN_GR(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_MEM(fmt,t,arg) + * UNW_DEC_PRIUNAT_WHEN_PSPREL(fmt,pspoff,arg) + * UNW_DEC_PRIUNAT_WHEN_SPREL(fmt,spoff,arg) + * UNW_DEC_PROLOGUE(fmt,body,rlen,arg) + * UNW_DEC_PROLOGUE_GR(fmt,rlen,mask,grsave,arg) + * UNW_DEC_REG_PSPREL(fmt,reg,pspoff,arg) + * UNW_DEC_REG_REG(fmt,src,dst,arg) + * UNW_DEC_REG_SPREL(fmt,reg,spoff,arg) + * UNW_DEC_REG_WHEN(fmt,reg,t,arg) + * UNW_DEC_RESTORE(fmt,t,abreg,arg) + * UNW_DEC_RESTORE_P(fmt,qp,t,abreg,arg) + * UNW_DEC_SPILL_BASE(fmt,pspoff,arg) + * UNW_DEC_SPILL_MASK(fmt,imaskp,arg) + * UNW_DEC_SPILL_PSPREL(fmt,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_PSPREL_P(fmt,qp,t,abreg,pspoff,arg) + * UNW_DEC_SPILL_REG(fmt,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_REG_P(fmt,qp,t,abreg,x,ytreg,arg) + * UNW_DEC_SPILL_SPREL(fmt,t,abreg,spoff,arg) + * UNW_DEC_SPILL_SPREL_P(fmt,qp,t,abreg,pspoff,arg) + */ + +static unw_word +unw_decode_uleb128 (unsigned char **dpp) +{ + unsigned shift = 0; + unw_word byte, result = 0; + unsigned char *bp = *dpp; + + while (1) + { + byte = *bp++; + result |= (byte & 0x7f) << shift; + if ((byte & 0x80) == 0) + break; + shift += 7; + } + *dpp = bp; + return result; +} + +static unsigned char * +unw_decode_x1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, abreg; + unw_word t, off; + + byte1 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL(X1, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL(X1, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + abreg = (byte1 & 0x7f); + ytreg = byte2; + x = (byte1 >> 7) & 1; + if ((byte1 & 0x80) == 0 && ytreg == 0) + UNW_DEC_RESTORE(X2, t, abreg, arg); + else + UNW_DEC_SPILL_REG(X2, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_x3 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, abreg, qp; + unw_word t, off; + + byte1 = *dp++; byte2 = *dp++; + t = unw_decode_uleb128 (&dp); + off = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + + if (byte1 & 0x80) + UNW_DEC_SPILL_SPREL_P(X3, qp, t, abreg, off, arg); + else + UNW_DEC_SPILL_PSPREL_P(X3, qp, t, abreg, off, arg); + return dp; +} + +static unsigned char * +unw_decode_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, byte2, byte3, qp, abreg, x, ytreg; + unw_word t; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + t = unw_decode_uleb128 (&dp); + + qp = (byte1 & 0x3f); + abreg = (byte2 & 0x7f); + x = (byte2 >> 7) & 1; + ytreg = byte3; + + if ((byte2 & 0x80) == 0 && byte3 == 0) + UNW_DEC_RESTORE_P(X4, qp, t, abreg, arg); + else + UNW_DEC_SPILL_REG_P(X4, qp, t, abreg, x, ytreg, arg); + return dp; +} + +static unsigned char * +unw_decode_r1 (unsigned char *dp, unsigned char code, void *arg) +{ + int body = (code & 0x20) != 0; + unw_word rlen; + + rlen = (code & 0x1f); + UNW_DEC_PROLOGUE(R1, body, rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_r2 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char byte1, mask, grsave; + unw_word rlen; + + byte1 = *dp++; + + mask = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + grsave = (byte1 & 0x7f); + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE_GR(R2, rlen, mask, grsave, arg); + return dp; +} + +static unsigned char * +unw_decode_r3 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word rlen; + + rlen = unw_decode_uleb128 (&dp); + UNW_DEC_PROLOGUE(R3, ((code & 0x3) == 1), rlen, arg); + return dp; +} + +static unsigned char * +unw_decode_p1 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char brmask = (code & 0x1f); + + UNW_DEC_BR_MEM(P1, brmask, arg); + return dp; +} + +static unsigned char * +unw_decode_p2_p5 (unsigned char *dp, unsigned char code, void *arg) +{ + if ((code & 0x10) == 0) + { + unsigned char byte1 = *dp++; + + UNW_DEC_BR_GR(P2, ((code & 0xf) << 1) | ((byte1 >> 7) & 1), + (byte1 & 0x7f), arg); + } + else if ((code & 0x08) == 0) + { + unsigned char byte1 = *dp++, r, dst; + + r = ((code & 0x7) << 1) | ((byte1 >> 7) & 1); + dst = (byte1 & 0x7f); + switch (r) + { + case 0: UNW_DEC_REG_GR(P3, UNW_REG_PSP, dst, arg); break; + case 1: UNW_DEC_REG_GR(P3, UNW_REG_RP, dst, arg); break; + case 2: UNW_DEC_REG_GR(P3, UNW_REG_PFS, dst, arg); break; + case 3: UNW_DEC_REG_GR(P3, UNW_REG_PR, dst, arg); break; + case 4: UNW_DEC_REG_GR(P3, UNW_REG_UNAT, dst, arg); break; + case 5: UNW_DEC_REG_GR(P3, UNW_REG_LC, dst, arg); break; + case 6: UNW_DEC_RP_BR(P3, dst, arg); break; + case 7: UNW_DEC_REG_GR(P3, UNW_REG_RNAT, dst, arg); break; + case 8: UNW_DEC_REG_GR(P3, UNW_REG_BSP, dst, arg); break; + case 9: UNW_DEC_REG_GR(P3, UNW_REG_BSPSTORE, dst, arg); break; + case 10: UNW_DEC_REG_GR(P3, UNW_REG_FPSR, dst, arg); break; + case 11: UNW_DEC_PRIUNAT_GR(P3, dst, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else if ((code & 0x7) == 0) + UNW_DEC_SPILL_MASK(P4, dp, arg); + else if ((code & 0x7) == 1) + { + unw_word grmask, frmask, byte1, byte2, byte3; + + byte1 = *dp++; byte2 = *dp++; byte3 = *dp++; + grmask = ((byte1 >> 4) & 0xf); + frmask = ((byte1 & 0xf) << 16) | (byte2 << 8) | byte3; + UNW_DEC_FRGR_MEM(P5, grmask, frmask, arg); + } + else + UNW_DEC_BAD_CODE(code); + return dp; +} + +static unsigned char * +unw_decode_p6 (unsigned char *dp, unsigned char code, void *arg) +{ + int gregs = (code & 0x10) != 0; + unsigned char mask = (code & 0x0f); + + if (gregs) + UNW_DEC_GR_MEM(P6, mask, arg); + else + UNW_DEC_FR_MEM(P6, mask, arg); + return dp; +} + +static unsigned char * +unw_decode_p7_p10 (unsigned char *dp, unsigned char code, void *arg) +{ + unsigned char r, byte1, byte2; + unw_word t, size; + + if ((code & 0x10) == 0) + { + r = (code & 0xf); + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 0: + size = unw_decode_uleb128 (&dp); + UNW_DEC_MEM_STACK_F(P7, t, size, arg); + break; + + case 1: UNW_DEC_MEM_STACK_V(P7, t, arg); break; + case 2: UNW_DEC_SPILL_BASE(P7, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P7, UNW_REG_PSP, t, arg); break; + case 4: UNW_DEC_REG_WHEN(P7, UNW_REG_RP, t, arg); break; + case 5: UNW_DEC_REG_PSPREL(P7, UNW_REG_RP, t, arg); break; + case 6: UNW_DEC_REG_WHEN(P7, UNW_REG_PFS, t, arg); break; + case 7: UNW_DEC_REG_PSPREL(P7, UNW_REG_PFS, t, arg); break; + case 8: UNW_DEC_REG_WHEN(P7, UNW_REG_PR, t, arg); break; + case 9: UNW_DEC_REG_PSPREL(P7, UNW_REG_PR, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P7, UNW_REG_LC, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P7, UNW_REG_LC, t, arg); break; + case 12: UNW_DEC_REG_WHEN(P7, UNW_REG_UNAT, t, arg); break; + case 13: UNW_DEC_REG_PSPREL(P7, UNW_REG_UNAT, t, arg); break; + case 14: UNW_DEC_REG_WHEN(P7, UNW_REG_FPSR, t, arg); break; + case 15: UNW_DEC_REG_PSPREL(P7, UNW_REG_FPSR, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + else + { + switch (code & 0xf) + { + case 0x0: /* p8 */ + { + r = *dp++; + t = unw_decode_uleb128 (&dp); + switch (r) + { + case 1: UNW_DEC_REG_SPREL(P8, UNW_REG_RP, t, arg); break; + case 2: UNW_DEC_REG_SPREL(P8, UNW_REG_PFS, t, arg); break; + case 3: UNW_DEC_REG_SPREL(P8, UNW_REG_PR, t, arg); break; + case 4: UNW_DEC_REG_SPREL(P8, UNW_REG_LC, t, arg); break; + case 5: UNW_DEC_REG_SPREL(P8, UNW_REG_UNAT, t, arg); break; + case 6: UNW_DEC_REG_SPREL(P8, UNW_REG_FPSR, t, arg); break; + case 7: UNW_DEC_REG_WHEN(P8, UNW_REG_BSP, t, arg); break; + case 8: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSP, t, arg); break; + case 9: UNW_DEC_REG_SPREL(P8, UNW_REG_BSP, t, arg); break; + case 10: UNW_DEC_REG_WHEN(P8, UNW_REG_BSPSTORE, t, arg); break; + case 11: UNW_DEC_REG_PSPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 12: UNW_DEC_REG_SPREL(P8, UNW_REG_BSPSTORE, t, arg); break; + case 13: UNW_DEC_REG_WHEN(P8, UNW_REG_RNAT, t, arg); break; + case 14: UNW_DEC_REG_PSPREL(P8, UNW_REG_RNAT, t, arg); break; + case 15: UNW_DEC_REG_SPREL(P8, UNW_REG_RNAT, t, arg); break; + case 16: UNW_DEC_PRIUNAT_WHEN_GR(P8, t, arg); break; + case 17: UNW_DEC_PRIUNAT_PSPREL(P8, t, arg); break; + case 18: UNW_DEC_PRIUNAT_SPREL(P8, t, arg); break; + case 19: UNW_DEC_PRIUNAT_WHEN_MEM(P8, t, arg); break; + default: UNW_DEC_BAD_CODE(r); break; + } + } + break; + + case 0x1: + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_GR_GR(P9, (byte1 & 0xf), (byte2 & 0x7f), arg); + break; + + case 0xf: /* p10 */ + byte1 = *dp++; byte2 = *dp++; + UNW_DEC_ABI(P10, byte1, byte2, arg); + break; + + case 0x9: + return unw_decode_x1 (dp, code, arg); + + case 0xa: + return unw_decode_x2 (dp, code, arg); + + case 0xb: + return unw_decode_x3 (dp, code, arg); + + case 0xc: + return unw_decode_x4 (dp, code, arg); + + default: + UNW_DEC_BAD_CODE(code); + break; + } + } + return dp; +} + +static unsigned char * +unw_decode_b1 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word label = (code & 0x1f); + + if ((code & 0x20) != 0) + UNW_DEC_COPY_STATE(B1, label, arg); + else + UNW_DEC_LABEL_STATE(B1, label, arg); + return dp; +} + +static unsigned char * +unw_decode_b2 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t; + + t = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B2, t, (code & 0x1f), arg); + return dp; +} + +static unsigned char * +unw_decode_b3_x4 (unsigned char *dp, unsigned char code, void *arg) +{ + unw_word t, ecount, label; + + if ((code & 0x10) == 0) + { + t = unw_decode_uleb128 (&dp); + ecount = unw_decode_uleb128 (&dp); + UNW_DEC_EPILOGUE(B3, t, ecount, arg); + } + else if ((code & 0x07) == 0) + { + label = unw_decode_uleb128 (&dp); + if ((code & 0x08) != 0) + UNW_DEC_COPY_STATE(B4, label, arg); + else + UNW_DEC_LABEL_STATE(B4, label, arg); + } + else + switch (code & 0x7) + { + case 1: return unw_decode_x1 (dp, code, arg); + case 2: return unw_decode_x2 (dp, code, arg); + case 3: return unw_decode_x3 (dp, code, arg); + case 4: return unw_decode_x4 (dp, code, arg); + default: UNW_DEC_BAD_CODE(code); break; + } + return dp; +} + +typedef unsigned char *(*unw_decoder) (unsigned char *, unsigned char, void *); + +static unw_decoder unw_decode_table[2][8] = +{ + /* prologue table: */ + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_p1, /* 4 */ + unw_decode_p2_p5, + unw_decode_p6, + unw_decode_p7_p10 + }, + { + unw_decode_r1, /* 0 */ + unw_decode_r1, + unw_decode_r2, + unw_decode_r3, + unw_decode_b1, /* 4 */ + unw_decode_b1, + unw_decode_b2, + unw_decode_b3_x4 + } +}; + +/* + * Decode one descriptor and return address of next descriptor. + */ +static inline unsigned char * +unw_decode (unsigned char *dp, int inside_body, void *arg) +{ + unw_decoder decoder; + unsigned char code; + + code = *dp++; + decoder = unw_decode_table[inside_body][code >> 5]; + dp = (*decoder) (dp, code, arg); + return dp; +} diff --git a/arch/ia64/kernel/unwind_i.h b/arch/ia64/kernel/unwind_i.h new file mode 100644 index 000000000000..1dd57ba44327 --- /dev/null +++ b/arch/ia64/kernel/unwind_i.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000, 2002-2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * Kernel unwind support. + */ + +#define UNW_VER(x) ((x) >> 48) +#define UNW_FLAG_MASK 0x0000ffff00000000 +#define UNW_FLAG_OSMASK 0x0000f00000000000 +#define UNW_FLAG_EHANDLER(x) ((x) & 0x0000000100000000L) +#define UNW_FLAG_UHANDLER(x) ((x) & 0x0000000200000000L) +#define UNW_LENGTH(x) ((x) & 0x00000000ffffffffL) + +enum unw_register_index { + /* primary unat: */ + UNW_REG_PRI_UNAT_GR, + UNW_REG_PRI_UNAT_MEM, + + /* register stack */ + UNW_REG_BSP, /* register stack pointer */ + UNW_REG_BSPSTORE, + UNW_REG_PFS, /* previous function state */ + UNW_REG_RNAT, + /* memory stack */ + UNW_REG_PSP, /* previous memory stack pointer */ + /* return pointer: */ + UNW_REG_RP, + + /* preserved registers: */ + UNW_REG_R4, UNW_REG_R5, UNW_REG_R6, UNW_REG_R7, + UNW_REG_UNAT, UNW_REG_PR, UNW_REG_LC, UNW_REG_FPSR, + UNW_REG_B1, UNW_REG_B2, UNW_REG_B3, UNW_REG_B4, UNW_REG_B5, + UNW_REG_F2, UNW_REG_F3, UNW_REG_F4, UNW_REG_F5, + UNW_REG_F16, UNW_REG_F17, UNW_REG_F18, UNW_REG_F19, + UNW_REG_F20, UNW_REG_F21, UNW_REG_F22, UNW_REG_F23, + UNW_REG_F24, UNW_REG_F25, UNW_REG_F26, UNW_REG_F27, + UNW_REG_F28, UNW_REG_F29, UNW_REG_F30, UNW_REG_F31, + UNW_NUM_REGS +}; + +struct unw_info_block { + u64 header; + u64 desc[]; /* unwind descriptors */ + /* personality routine and language-specific data follow behind descriptors */ +}; + +struct unw_table { + struct unw_table *next; /* must be first member! */ + const char *name; + unsigned long gp; /* global pointer for this load-module */ + unsigned long segment_base; /* base for offsets in the unwind table entries */ + unsigned long start; + unsigned long end; + const struct unw_table_entry *array; + unsigned long length; +}; + +enum unw_where { + UNW_WHERE_NONE, /* register isn't saved at all */ + UNW_WHERE_GR, /* register is saved in a general register */ + UNW_WHERE_FR, /* register is saved in a floating-point register */ + UNW_WHERE_BR, /* register is saved in a branch register */ + UNW_WHERE_SPREL, /* register is saved on memstack (sp-relative) */ + UNW_WHERE_PSPREL, /* register is saved on memstack (psp-relative) */ + /* + * At the end of each prologue these locations get resolved to + * UNW_WHERE_PSPREL and UNW_WHERE_GR, respectively: + */ + UNW_WHERE_SPILL_HOME, /* register is saved in its spill home */ + UNW_WHERE_GR_SAVE /* register is saved in next general register */ +}; + +#define UNW_WHEN_NEVER 0x7fffffff + +struct unw_reg_info { + unsigned long val; /* save location: register number or offset */ + enum unw_where where; /* where the register gets saved */ + int when; /* when the register gets saved */ +}; + +struct unw_reg_state { + struct unw_reg_state *next; /* next (outer) element on state stack */ + struct unw_reg_info reg[UNW_NUM_REGS]; /* register save locations */ +}; + +struct unw_labeled_state { + struct unw_labeled_state *next; /* next labeled state (or NULL) */ + unsigned long label; /* label for this state */ + struct unw_reg_state saved_state; +}; + +struct unw_state_record { + unsigned int first_region : 1; /* is this the first region? */ + unsigned int done : 1; /* are we done scanning descriptors? */ + unsigned int any_spills : 1; /* got any register spills? */ + unsigned int in_body : 1; /* are we inside a body (as opposed to a prologue)? */ + unsigned long flags; /* see UNW_FLAG_* in unwind.h */ + + u8 *imask; /* imask of spill_mask record or NULL */ + unsigned long pr_val; /* predicate values */ + unsigned long pr_mask; /* predicate mask */ + long spill_offset; /* psp-relative offset for spill base */ + int region_start; + int region_len; + int epilogue_start; + int epilogue_count; + int when_target; + + u8 gr_save_loc; /* next general register to use for saving a register */ + u8 return_link_reg; /* branch register in which the return link is passed */ + + struct unw_labeled_state *labeled_states; /* list of all labeled states */ + struct unw_reg_state curr; /* current state */ +}; + +enum unw_nat_type { + UNW_NAT_NONE, /* NaT not represented */ + UNW_NAT_VAL, /* NaT represented by NaT value (fp reg) */ + UNW_NAT_MEMSTK, /* NaT value is in unat word at offset OFF */ + UNW_NAT_REGSTK /* NaT is in rnat */ +}; + +enum unw_insn_opcode { + UNW_INSN_ADD, /* s[dst] += val */ + UNW_INSN_ADD_PSP, /* s[dst] = (s.psp + val) */ + UNW_INSN_ADD_SP, /* s[dst] = (s.sp + val) */ + UNW_INSN_MOVE, /* s[dst] = s[val] */ + UNW_INSN_MOVE2, /* s[dst] = s[val]; s[dst+1] = s[val+1] */ + UNW_INSN_MOVE_STACKED, /* s[dst] = ia64_rse_skip(*s.bsp, val) */ + UNW_INSN_SETNAT_MEMSTK, /* s[dst+1].nat.type = MEMSTK; + s[dst+1].nat.off = *s.pri_unat - s[dst] */ + UNW_INSN_SETNAT_TYPE, /* s[dst+1].nat.type = val */ + UNW_INSN_LOAD, /* s[dst] = *s[val] */ + UNW_INSN_MOVE_SCRATCH, /* s[dst] = scratch reg "val" */ + UNW_INSN_MOVE_CONST, /* s[dst] = constant reg "val" */ +}; + +struct unw_insn { + unsigned int opc : 4; + unsigned int dst : 9; + signed int val : 19; +}; + +/* + * Preserved general static registers (r4-r7) give rise to two script + * instructions; everything else yields at most one instruction; at + * the end of the script, the psp gets popped, accounting for one more + * instruction. + */ +#define UNW_MAX_SCRIPT_LEN (UNW_NUM_REGS + 5) + +struct unw_script { + unsigned long ip; /* ip this script is for */ + unsigned long pr_mask; /* mask of predicates script depends on */ + unsigned long pr_val; /* predicate values this script is for */ + rwlock_t lock; + unsigned int flags; /* see UNW_FLAG_* in unwind.h */ + unsigned short lru_chain; /* used for least-recently-used chain */ + unsigned short coll_chain; /* used for hash collisions */ + unsigned short hint; /* hint for next script to try (or -1) */ + unsigned short count; /* number of instructions in script */ + struct unw_insn insn[UNW_MAX_SCRIPT_LEN]; +}; diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S new file mode 100644 index 000000000000..53dfde161c8a --- /dev/null +++ b/arch/ia64/kernel/vmlinux.lds.S @@ -0,0 +1,224 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include +#include +#include +#include + +#define EMITS_PT_NOTE +#define RO_EXCEPTION_TABLE_ALIGN 16 + +#include + +OUTPUT_FORMAT("elf64-ia64-little") +OUTPUT_ARCH(ia64) +ENTRY(phys_start) +jiffies = jiffies_64; + +PHDRS { + text PT_LOAD; + percpu PT_LOAD; + data PT_LOAD; + note PT_NOTE; + unwind 0x70000001; /* PT_IA_64_UNWIND, but ld doesn't match the name */ +} + +SECTIONS { + /* + * unwind exit sections must be discarded before + * the rest of the sections get included. + */ + /DISCARD/ : { + *(.IA_64.unwind.exit.text) + *(.IA_64.unwind_info.exit.text) + *(.comment) + *(.note) + } + + v = PAGE_OFFSET; /* this symbol is here to make debugging easier... */ + phys_start = _start - LOAD_OFFSET; + + code : { + } :text + . = KERNEL_START; + + _text = .; + _stext = .; + + .text : AT(ADDR(.text) - LOAD_OFFSET) { + __start_ivt_text = .; + *(.text..ivt) + __end_ivt_text = .; + TEXT_TEXT + SCHED_TEXT + LOCK_TEXT + KPROBES_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.gnu.linkonce.t*) + } + + .text2 : AT(ADDR(.text2) - LOAD_OFFSET) { + *(.text2) + } + +#ifdef CONFIG_SMP + .text..lock : AT(ADDR(.text..lock) - LOAD_OFFSET) { + *(.text..lock) + } +#endif + _etext = .; + + /* + * Read-only data + */ + + /* MCA table */ + . = ALIGN(16); + __mca_table : AT(ADDR(__mca_table) - LOAD_OFFSET) { + __start___mca_table = .; + *(__mca_table) + __stop___mca_table = .; + } + + .data..patch.phys_stack_reg : AT(ADDR(.data..patch.phys_stack_reg) - LOAD_OFFSET) { + __start___phys_stack_reg_patchlist = .; + *(.data..patch.phys_stack_reg) + __end___phys_stack_reg_patchlist = .; + } + + /* + * Global data + */ + _data = .; + + /* Unwind info & table: */ + . = ALIGN(8); + .IA_64.unwind_info : AT(ADDR(.IA_64.unwind_info) - LOAD_OFFSET) { + *(.IA_64.unwind_info*) + } + .IA_64.unwind : AT(ADDR(.IA_64.unwind) - LOAD_OFFSET) { + __start_unwind = .; + *(.IA_64.unwind*) + __end_unwind = .; + } :text :unwind + code_continues2 : { + } :text + + RO_DATA(4096) + + .opd : AT(ADDR(.opd) - LOAD_OFFSET) { + __start_opd = .; + *(.opd) + __end_opd = .; + } + + /* + * Initialization code and data: + */ + . = ALIGN(PAGE_SIZE); + __init_begin = .; + + INIT_TEXT_SECTION(PAGE_SIZE) + INIT_DATA_SECTION(16) + + .data..patch.vtop : AT(ADDR(.data..patch.vtop) - LOAD_OFFSET) { + __start___vtop_patchlist = .; + *(.data..patch.vtop) + __end___vtop_patchlist = .; + } + + .data..patch.rse : AT(ADDR(.data..patch.rse) - LOAD_OFFSET) { + __start___rse_patchlist = .; + *(.data..patch.rse) + __end___rse_patchlist = .; + } + + .data..patch.mckinley_e9 : AT(ADDR(.data..patch.mckinley_e9) - LOAD_OFFSET) { + __start___mckinley_e9_bundles = .; + *(.data..patch.mckinley_e9) + __end___mckinley_e9_bundles = .; + } + +#ifdef CONFIG_SMP + . = ALIGN(PERCPU_PAGE_SIZE); + __cpu0_per_cpu = .; + . = . + PERCPU_PAGE_SIZE; /* cpu0 per-cpu space */ +#endif + + . = ALIGN(PAGE_SIZE); + __init_end = .; + + .data..page_aligned : AT(ADDR(.data..page_aligned) - LOAD_OFFSET) { + PAGE_ALIGNED_DATA(PAGE_SIZE) + . = ALIGN(PAGE_SIZE); + __start_gate_section = .; + *(.data..gate) + __stop_gate_section = .; + } + /* + * make sure the gate page doesn't expose + * kernel data + */ + . = ALIGN(PAGE_SIZE); + + /* Per-cpu data: */ + . = ALIGN(PERCPU_PAGE_SIZE); + PERCPU_VADDR(SMP_CACHE_BYTES, PERCPU_ADDR, :percpu) + __phys_per_cpu_start = __per_cpu_load; + /* + * ensure percpu data fits + * into percpu page size + */ + . = __phys_per_cpu_start + PERCPU_PAGE_SIZE; + + data : { + } :data + .data : AT(ADDR(.data) - LOAD_OFFSET) { + _sdata = .; + INIT_TASK_DATA(PAGE_SIZE) + CACHELINE_ALIGNED_DATA(SMP_CACHE_BYTES) + READ_MOSTLY_DATA(SMP_CACHE_BYTES) + DATA_DATA + *(.data1) + *(.gnu.linkonce.d*) + CONSTRUCTORS + } + + BUG_TABLE + + . = ALIGN(16); /* gp must be 16-byte aligned for exc. table */ + .got : AT(ADDR(.got) - LOAD_OFFSET) { + *(.got.plt) + *(.got) + } + __gp = ADDR(.got) + 0x200000; + + /* + * We want the small data sections together, + * so single-instruction offsets can access + * them all, and initialized data all before + * uninitialized, so we can shorten the + * on-disk segment size. + */ + .sdata : AT(ADDR(.sdata) - LOAD_OFFSET) { + *(.sdata) + *(.sdata1) + *(.srdata) + } + _edata = .; + + BSS_SECTION(0, 0, 0) + + _end = .; + + code : { + } :text + + STABS_DEBUG + DWARF_DEBUG + ELF_DETAILS + + /* Default discards */ + DISCARDS +} diff --git a/arch/ia64/lib/Makefile b/arch/ia64/lib/Makefile new file mode 100644 index 000000000000..081fcba01dc0 --- /dev/null +++ b/arch/ia64/lib/Makefile @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for ia64-specific library routines.. +# + +lib-y := io.o __divsi3.o __udivsi3.o __modsi3.o __umodsi3.o \ + __divdi3.o __udivdi3.o __moddi3.o __umoddi3.o \ + checksum.o clear_page.o csum_partial_copy.o \ + clear_user.o strncpy_from_user.o strnlen_user.o \ + flush.o ip_fast_csum.o do_csum.o \ + memset.o strlen.o xor.o + +lib-$(CONFIG_ITANIUM) += copy_page.o copy_user.o memcpy.o +lib-$(CONFIG_MCKINLEY) += copy_page_mck.o memcpy_mck.o + +AFLAGS___divdi3.o = +AFLAGS___udivdi3.o = -DUNSIGNED +AFLAGS___moddi3.o = -DMODULO +AFLAGS___umoddi3.o = -DUNSIGNED -DMODULO + +AFLAGS___divsi3.o = +AFLAGS___udivsi3.o = -DUNSIGNED +AFLAGS___modsi3.o = -DMODULO +AFLAGS___umodsi3.o = -DUNSIGNED -DMODULO + +$(obj)/__divdi3.o: $(src)/idiv64.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__udivdi3.o: $(src)/idiv64.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__moddi3.o: $(src)/idiv64.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__umoddi3.o: $(src)/idiv64.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__divsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__udivsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__modsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_rule,as_o_S) + +$(obj)/__umodsi3.o: $(src)/idiv32.S FORCE + $(call if_changed_rule,as_o_S) diff --git a/arch/ia64/lib/checksum.c b/arch/ia64/lib/checksum.c new file mode 100644 index 000000000000..d26517fe3500 --- /dev/null +++ b/arch/ia64/lib/checksum.c @@ -0,0 +1,102 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Network checksum routines + * + * Copyright (C) 1999, 2003 Hewlett-Packard Co + * Stephane Eranian + * + * Most of the code coming from arch/alpha/lib/checksum.c + * + * This file contains network checksum routines that are better done + * in an architecture-specific manner due to speed.. + */ + +#include +#include + +#include + +static inline unsigned short +from64to16 (unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * computes the checksum of the TCP/UDP pseudo-header + * returns a 16-bit checksum, already complemented. + */ +__sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + return (__force __sum16)~from64to16( + (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8)); +} + +EXPORT_SYMBOL(csum_tcpudp_magic); + +__wsum +csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + unsigned long result; + + result = (__force u64)saddr + (__force u64)daddr + + (__force u64)sum + ((len + proto) << 8); + + /* Fold down to 32-bits so we don't lose in the typedef-less network stack. */ + /* 64 to 33 */ + result = (result & 0xffffffff) + (result >> 32); + /* 33 to 32 */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} +EXPORT_SYMBOL(csum_tcpudp_nofold); + +extern unsigned long do_csum (const unsigned char *, long); + +/* + * computes the checksum of a memory block at buff, length len, + * and adds in "sum" (32-bit) + * + * returns a 32-bit number suitable for feeding into itself + * or csum_tcpudp_magic + * + * this function must be called with even lengths, except + * for the last fragment, which may be odd + * + * it's best to have buff aligned on a 32-bit boundary + */ +__wsum csum_partial(const void *buff, int len, __wsum sum) +{ + u64 result = do_csum(buff, len); + + /* add in old sum, and carry.. */ + result += (__force u32)sum; + /* 32+c bits -> 32 bits */ + result = (result & 0xffffffff) + (result >> 32); + return (__force __wsum)result; +} + +EXPORT_SYMBOL(csum_partial); + +/* + * this routine is used for miscellaneous IP-like checksums, mainly + * in icmp.c + */ +__sum16 ip_compute_csum (const void *buff, int len) +{ + return (__force __sum16)~do_csum(buff,len); +} + +EXPORT_SYMBOL(ip_compute_csum); diff --git a/arch/ia64/lib/clear_page.S b/arch/ia64/lib/clear_page.S new file mode 100644 index 000000000000..ba0dd2538fa5 --- /dev/null +++ b/arch/ia64/lib/clear_page.S @@ -0,0 +1,79 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1999-2002 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + * Copyright (C) 2002 Ken Chen + * + * 1/06/01 davidm Tuned for Itanium. + * 2/12/02 kchen Tuned for both Itanium and McKinley + * 3/08/02 davidm Some more tweaking + */ + +#include +#include +#include + +#ifdef CONFIG_ITANIUM +# define L3_LINE_SIZE 64 // Itanium L3 line size +# define PREFETCH_LINES 9 // magic number +#else +# define L3_LINE_SIZE 128 // McKinley L3 line size +# define PREFETCH_LINES 12 // magic number +#endif + +#define saved_lc r2 +#define dst_fetch r3 +#define dst1 r8 +#define dst2 r9 +#define dst3 r10 +#define dst4 r11 + +#define dst_last r31 + +GLOBAL_ENTRY(clear_page) + .prologue + .regstk 1,0,0,0 + mov r16 = PAGE_SIZE/L3_LINE_SIZE-1 // main loop count, -1=repeat/until + .save ar.lc, saved_lc + mov saved_lc = ar.lc + + .body + mov ar.lc = (PREFETCH_LINES - 1) + mov dst_fetch = in0 + adds dst1 = 16, in0 + adds dst2 = 32, in0 + ;; +.fetch: stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + adds dst3 = 48, in0 // executing this multiple times is harmless + br.cloop.sptk.few .fetch + ;; + addl dst_last = (PAGE_SIZE - PREFETCH_LINES*L3_LINE_SIZE), dst_fetch + mov ar.lc = r16 // one L3 line per iteration + adds dst4 = 64, in0 + ;; +#ifdef CONFIG_ITANIUM + // Optimized for Itanium +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + cmp.lt p8,p0=dst_fetch, dst_last + ;; +#else + // Optimized for McKinley +1: stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 + stf.spill.nta [dst3] = f0, 64 + stf.spill.nta [dst4] = f0, 128 + cmp.lt p8,p0=dst_fetch, dst_last + ;; + stf.spill.nta [dst1] = f0, 64 + stf.spill.nta [dst2] = f0, 64 +#endif + stf.spill.nta [dst3] = f0, 64 +(p8) stf.spill.nta [dst_fetch] = f0, L3_LINE_SIZE + br.cloop.sptk.few 1b + ;; + mov ar.lc = saved_lc // restore lc + br.ret.sptk.many rp +END(clear_page) +EXPORT_SYMBOL(clear_page) diff --git a/arch/ia64/lib/clear_user.S b/arch/ia64/lib/clear_user.S new file mode 100644 index 000000000000..1d9e45ccf8e5 --- /dev/null +++ b/arch/ia64/lib/clear_user.S @@ -0,0 +1,212 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This routine clears to zero a linear memory buffer in user space. + * + * Inputs: + * in0: address of buffer + * in1: length of buffer in bytes + * Outputs: + * r8: number of bytes that didn't get cleared due to a fault + * + * Copyright (C) 1998, 1999, 2001 Hewlett-Packard Co + * Stephane Eranian + */ + +#include +#include + +// +// arguments +// +#define buf r32 +#define len r33 + +// +// local registers +// +#define cnt r16 +#define buf2 r17 +#define saved_lc r18 +#define saved_pfs r19 +#define tmp r20 +#define len2 r21 +#define len3 r22 + +// +// Theory of operations: +// - we check whether or not the buffer is small, i.e., less than 17 +// in which case we do the byte by byte loop. +// +// - Otherwise we go progressively from 1 byte store to 8byte store in +// the head part, the body is a 16byte store loop and we finish we the +// tail for the last 15 bytes. +// The good point about this breakdown is that the long buffer handling +// contains only 2 branches. +// +// The reason for not using shifting & masking for both the head and the +// tail is to stay semantically correct. This routine is not supposed +// to write bytes outside of the buffer. While most of the time this would +// be ok, we can't tolerate a mistake. A classical example is the case +// of multithreaded code were to the extra bytes touched is actually owned +// by another thread which runs concurrently to ours. Another, less likely, +// example is with device drivers where reading an I/O mapped location may +// have side effects (same thing for writing). +// + +GLOBAL_ENTRY(__do_clear_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,0,0,0 + cmp.eq p6,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) + .body + ;; // avoid WAW on CFM + adds tmp=-1,len // br.ctop is repeat/until + mov ret0=len // return value is length at this point +(p6) br.ret.spnt.many rp + ;; + cmp.lt p6,p0=16,len // if len > 16 then long memset + mov ar.lc=tmp // initialize lc for small count +(p6) br.cond.dptk .long_do_clear + ;; // WAR on ar.lc + // + // worst case 16 iterations, avg 8 iterations + // + // We could have played with the predicates to use the extra + // M slot for 2 stores/iteration but the cost the initialization + // the various counters compared to how long the loop is supposed + // to last on average does not make this solution viable. + // +1: + EX( .Lexit1, st1 [buf]=r0,1 ) + adds len=-1,len // countdown length using len + br.cloop.dptk 1b + ;; // avoid RAW on ar.lc + // + // .Lexit4: comes from byte by byte loop + // len contains bytes left +.Lexit1: + mov ret0=len // faster than using ar.lc + mov ar.lc=saved_lc + br.ret.sptk.many rp // end of short clear_user + + + // + // At this point we know we have more than 16 bytes to copy + // so we focus on alignment (no branches required) + // + // The use of len/len2 for countdown of the number of bytes left + // instead of ret0 is due to the fact that the exception code + // changes the values of r8. + // +.long_do_clear: + tbit.nz p6,p0=buf,0 // odd alignment (for long_do_clear) + ;; + EX( .Lexit3, (p6) st1 [buf]=r0,1 ) // 1-byte aligned +(p6) adds len=-1,len;; // sync because buf is modified + tbit.nz p6,p0=buf,1 + ;; + EX( .Lexit3, (p6) st2 [buf]=r0,2 ) // 2-byte aligned +(p6) adds len=-2,len;; + tbit.nz p6,p0=buf,2 + ;; + EX( .Lexit3, (p6) st4 [buf]=r0,4 ) // 4-byte aligned +(p6) adds len=-4,len;; + tbit.nz p6,p0=buf,3 + ;; + EX( .Lexit3, (p6) st8 [buf]=r0,8 ) // 8-byte aligned +(p6) adds len=-8,len;; + shr.u cnt=len,4 // number of 128-bit (2x64bit) words + ;; + cmp.eq p6,p0=r0,cnt + adds tmp=-1,cnt +(p6) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds buf2=8,buf // setup second base pointer + mov ar.lc=tmp + ;; + + // + // 16bytes/iteration core loop + // + // The second store can never generate a fault because + // we come into the loop only when we are 16-byte aligned. + // This means that if we cross a page then it will always be + // in the first store and never in the second. + // + // + // We need to keep track of the remaining length. A possible (optimistic) + // way would be to use ar.lc and derive how many byte were left by + // doing : left= 16*ar.lc + 16. this would avoid the addition at + // every iteration. + // However we need to keep the synchronization point. A template + // M;;MB does not exist and thus we can keep the addition at no + // extra cycle cost (use a nop slot anyway). It also simplifies the + // (unlikely) error recovery code + // + +2: EX(.Lexit3, st8 [buf]=r0,16 ) + ;; // needed to get len correct when error + st8 [buf2]=r0,16 + adds len=-16,len + br.cloop.dptk 2b + ;; + mov ar.lc=saved_lc + // + // tail correction based on len only + // + // We alternate the use of len3,len2 to allow parallelism and correct + // error handling. We also reuse p6/p7 to return correct value. + // The addition of len2/len3 does not cost anything more compared to + // the regular memset as we had empty slots. + // +.dotail: + mov len2=len // for parallelization of error handling + mov len3=len + tbit.nz p6,p0=len,3 + ;; + EX( .Lexit2, (p6) st8 [buf]=r0,8 ) // at least 8 bytes +(p6) adds len3=-8,len2 + tbit.nz p7,p6=len,2 + ;; + EX( .Lexit2, (p7) st4 [buf]=r0,4 ) // at least 4 bytes +(p7) adds len2=-4,len3 + tbit.nz p6,p7=len,1 + ;; + EX( .Lexit2, (p6) st2 [buf]=r0,2 ) // at least 2 bytes +(p6) adds len3=-2,len2 + tbit.nz p7,p6=len,0 + ;; + EX( .Lexit2, (p7) st1 [buf]=r0 ) // only 1 byte left + mov ret0=r0 // success + br.ret.sptk.many rp // end of most likely path + + // + // Outlined error handling code + // + + // + // .Lexit3: comes from core loop, need restore pr/lc + // len contains bytes left + // + // + // .Lexit2: + // if p6 -> coming from st8 or st2 : len2 contains what's left + // if p7 -> coming from st4 or st1 : len3 contains what's left + // We must restore lc/pr even though might not have been used. +.Lexit2: + .pred.rel "mutex", p6, p7 +(p6) mov len=len2 +(p7) mov len=len3 + ;; + // + // .Lexit4: comes from head, need not restore pr/lc + // len contains bytes left + // +.Lexit3: + mov ret0=len + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(__do_clear_user) +EXPORT_SYMBOL(__do_clear_user) diff --git a/arch/ia64/lib/copy_page.S b/arch/ia64/lib/copy_page.S new file mode 100644 index 000000000000..c0a0e6b2af00 --- /dev/null +++ b/arch/ia64/lib/copy_page.S @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Optimized version of the standard copy_page() function + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger + * + * 4/06/01 davidm Tuned to make it perform well both for cached and uncached copies. + */ +#include +#include +#include + +#define PIPE_DEPTH 3 +#define EPI p[PIPE_DEPTH-1] + +#define lcount r16 +#define saved_pr r17 +#define saved_lc r18 +#define saved_pfs r19 +#define src1 r20 +#define src2 r21 +#define tgt1 r22 +#define tgt2 r23 +#define srcf r24 +#define tgtf r25 +#define tgt_last r26 + +#define Nrot ((8*PIPE_DEPTH+7)&~7) + +GLOBAL_ENTRY(copy_page) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + + .rotr t1[PIPE_DEPTH], t2[PIPE_DEPTH], t3[PIPE_DEPTH], t4[PIPE_DEPTH], \ + t5[PIPE_DEPTH], t6[PIPE_DEPTH], t7[PIPE_DEPTH], t8[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + .save ar.lc, saved_lc + mov saved_lc=ar.lc + mov ar.ec=PIPE_DEPTH + + mov lcount=PAGE_SIZE/64-1 + .save pr, saved_pr + mov saved_pr=pr + mov pr.rot=1<<16 + + .body + + mov src1=in1 + adds src2=8,in1 + mov tgt_last = PAGE_SIZE + ;; + adds tgt2=8,in0 + add srcf=512,in1 + mov ar.lc=lcount + mov tgt1=in0 + add tgtf=512,in0 + add tgt_last = tgt_last, in0 + ;; +1: +(p[0]) ld8 t1[0]=[src1],16 +(EPI) st8 [tgt1]=t1[PIPE_DEPTH-1],16 +(p[0]) ld8 t2[0]=[src2],16 +(EPI) st8 [tgt2]=t2[PIPE_DEPTH-1],16 + cmp.ltu p6,p0 = tgtf, tgt_last + ;; +(p[0]) ld8 t3[0]=[src1],16 +(EPI) st8 [tgt1]=t3[PIPE_DEPTH-1],16 +(p[0]) ld8 t4[0]=[src2],16 +(EPI) st8 [tgt2]=t4[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t5[0]=[src1],16 +(EPI) st8 [tgt1]=t5[PIPE_DEPTH-1],16 +(p[0]) ld8 t6[0]=[src2],16 +(EPI) st8 [tgt2]=t6[PIPE_DEPTH-1],16 + ;; +(p[0]) ld8 t7[0]=[src1],16 +(EPI) st8 [tgt1]=t7[PIPE_DEPTH-1],16 +(p[0]) ld8 t8[0]=[src2],16 +(EPI) st8 [tgt2]=t8[PIPE_DEPTH-1],16 + +(p6) lfetch [srcf], 64 +(p6) lfetch [tgtf], 64 + br.ctop.sptk.few 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 // restore predicates + mov ar.pfs=saved_pfs + mov ar.lc=saved_lc + br.ret.sptk.many rp +END(copy_page) +EXPORT_SYMBOL(copy_page) diff --git a/arch/ia64/lib/copy_page_mck.S b/arch/ia64/lib/copy_page_mck.S new file mode 100644 index 000000000000..5e8bb4b4b535 --- /dev/null +++ b/arch/ia64/lib/copy_page_mck.S @@ -0,0 +1,188 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * McKinley-optimized version of copy_page(). + * + * Copyright (C) 2002 Hewlett-Packard Co + * David Mosberger + * + * Inputs: + * in0: address of target page + * in1: address of source page + * Output: + * no return value + * + * General idea: + * - use regular loads and stores to prefetch data to avoid consuming M-slot just for + * lfetches => good for in-cache performance + * - avoid l2 bank-conflicts by not storing into the same 16-byte bank within a single + * cycle + * + * Principle of operation: + * First, note that L1 has a line-size of 64 bytes and L2 a line-size of 128 bytes. + * To avoid secondary misses in L2, we prefetch both source and destination with a line-size + * of 128 bytes. When both of these lines are in the L2 and the first half of the + * source line is in L1, we start copying the remaining words. The second half of the + * source line is prefetched in an earlier iteration, so that by the time we start + * accessing it, it's also present in the L1. + * + * We use a software-pipelined loop to control the overall operation. The pipeline + * has 2*PREFETCH_DIST+K stages. The first PREFETCH_DIST stages are used for prefetching + * source cache-lines. The second PREFETCH_DIST stages are used for prefetching destination + * cache-lines, the last K stages are used to copy the cache-line words not copied by + * the prefetches. The four relevant points in the pipelined are called A, B, C, D: + * p[A] is TRUE if a source-line should be prefetched, p[B] is TRUE if a destination-line + * should be prefetched, p[C] is TRUE if the second half of an L2 line should be brought + * into L1D and p[D] is TRUE if a cacheline needs to be copied. + * + * This all sounds very complicated, but thanks to the modulo-scheduled loop support, + * the resulting code is very regular and quite easy to follow (once you get the idea). + * + * As a secondary optimization, the first 2*PREFETCH_DIST iterations are implemented + * as the separate .prefetch_loop. Logically, this loop performs exactly like the + * main-loop (.line_copy), but has all known-to-be-predicated-off instructions removed, + * so that each loop iteration is faster (again, good for cached case). + * + * When reading the code, it helps to keep the following picture in mind: + * + * word 0 word 1 + * +------+------+--- + * | v[x] | t1 | ^ + * | t2 | t3 | | + * | t4 | t5 | | + * | t6 | t7 | | 128 bytes + * | n[y] | t9 | | (L2 cache line) + * | t10 | t11 | | + * | t12 | t13 | | + * | t14 | t15 | v + * +------+------+--- + * + * Here, v[x] is copied by the (memory) prefetch. n[y] is loaded at p[C] + * to fetch the second-half of the L2 cache line into L1, and the tX words are copied in + * an order that avoids bank conflicts. + */ +#include +#include +#include + +#define PREFETCH_DIST 8 // McKinley sustains 16 outstanding L2 misses (8 ld, 8 st) + +#define src0 r2 +#define src1 r3 +#define dst0 r9 +#define dst1 r10 +#define src_pre_mem r11 +#define dst_pre_mem r14 +#define src_pre_l2 r15 +#define dst_pre_l2 r16 +#define t1 r17 +#define t2 r18 +#define t3 r19 +#define t4 r20 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r21 +#define t15 r22 + +#define saved_lc r23 +#define saved_pr r24 + +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 3) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +GLOBAL_ENTRY(copy_page) + .prologue + alloc r8 = ar.pfs, 2, Nrot-2, 0, Nrot + + .rotr v[2*PREFETCH_DIST], n[D-C+1] + .rotp p[N] + + .save ar.lc, saved_lc + mov saved_lc = ar.lc + .save pr, saved_pr + mov saved_pr = pr + .body + + mov src_pre_mem = in1 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = in0 + mov ar.lc = 2*PREFETCH_DIST - 1 + + add src_pre_l2 = 8*8, in1 + add dst_pre_l2 = 8*8, in0 + add src0 = 8, in1 // first t1 src + add src1 = 3*8, in1 // first t3 src + add dst0 = 8, in0 // first t1 dst + add dst1 = 3*8, in0 // first t3 dst + mov t1 = (PAGE_SIZE/128) - (2*PREFETCH_DIST) - 1 + nop.m 0 + nop.i 0 + ;; + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 (br.ctop cleared it to zero) + mov ar.lc = t1 // with 64KB pages, t1 is too big to fit in 8 bits! + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +(p[D]) ld8 t2 = [src0], 3*8 // M0 +(p[D]) ld8 t4 = [src1], 3*8 // M1 +(p[B]) st8 [dst_pre_mem] = v[B], 128 // M2 prefetch dst from memory +(p[D]) st8 [dst_pre_l2] = n[D-C], 128 // M3 prefetch dst from L2 + ;; +(p[A]) ld8 v[A] = [src_pre_mem], 128 // M0 prefetch src from memory +(p[C]) ld8 n[0] = [src_pre_l2], 128 // M1 prefetch src from L2 +(p[D]) st8 [dst0] = t1, 8 // M2 +(p[D]) st8 [dst1] = t3, 8 // M3 + ;; +(p[D]) ld8 t5 = [src0], 8 +(p[D]) ld8 t7 = [src1], 3*8 +(p[D]) st8 [dst0] = t2, 3*8 +(p[D]) st8 [dst1] = t4, 3*8 + ;; +(p[D]) ld8 t6 = [src0], 3*8 +(p[D]) ld8 t10 = [src1], 8 +(p[D]) st8 [dst0] = t5, 8 +(p[D]) st8 [dst1] = t7, 3*8 + ;; +(p[D]) ld8 t9 = [src0], 3*8 +(p[D]) ld8 t11 = [src1], 3*8 +(p[D]) st8 [dst0] = t6, 3*8 +(p[D]) st8 [dst1] = t10, 8 + ;; +(p[D]) ld8 t12 = [src0], 8 +(p[D]) ld8 t14 = [src1], 8 +(p[D]) st8 [dst0] = t9, 3*8 +(p[D]) st8 [dst1] = t11, 3*8 + ;; +(p[D]) ld8 t13 = [src0], 4*8 +(p[D]) ld8 t15 = [src1], 4*8 +(p[D]) st8 [dst0] = t12, 8 +(p[D]) st8 [dst1] = t14, 8 + ;; +(p[D-1])ld8 t1 = [src0], 8 +(p[D-1])ld8 t3 = [src1], 8 +(p[D]) st8 [dst0] = t13, 4*8 +(p[D]) st8 [dst1] = t15, 4*8 + br.ctop.sptk .line_copy + ;; + mov ar.lc = saved_lc + mov pr = saved_pr, -1 + br.ret.sptk.many rp +END(copy_page) +EXPORT_SYMBOL(copy_page) diff --git a/arch/ia64/lib/copy_user.S b/arch/ia64/lib/copy_user.S new file mode 100644 index 000000000000..8daab72cfe77 --- /dev/null +++ b/arch/ia64/lib/copy_user.S @@ -0,0 +1,613 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Optimized version of the copy_user() routine. + * It is used to copy date across the kernel/user boundary. + * + * The source and destination are always on opposite side of + * the boundary. When reading from user space we must catch + * faults on loads. When writing to user space we must catch + * errors on stores. Note that because of the nature of the copy + * we don't need to worry about overlapping regions. + * + * + * Inputs: + * in0 address of source buffer + * in1 address of destination buffer + * in2 number of bytes to copy + * + * Outputs: + * ret0 0 in case of success. The number of bytes NOT copied in + * case of error. + * + * Copyright (C) 2000-2001 Hewlett-Packard Co + * Stephane Eranian + * + * Fixme: + * - handle the case where we have more than 16 bytes and the alignment + * are different. + * - more benchmarking + * - fix extraneous stop bit introduced by the EX() macro. + */ + +#include +#include + +// +// Tuneable parameters +// +#define COPY_BREAK 16 // we do byte copy below (must be >=16) +#define PIPE_DEPTH 21 // pipe depth + +#define EPI p[PIPE_DEPTH-1] + +// +// arguments +// +#define dst in0 +#define src in1 +#define len in2 + +// +// local registers +// +#define t1 r2 // rshift in bytes +#define t2 r3 // lshift in bytes +#define rshift r14 // right shift in bits +#define lshift r15 // left shift in bits +#define word1 r16 +#define word2 r17 +#define cnt r18 +#define len2 r19 +#define saved_lc r20 +#define saved_pr r21 +#define tmp r22 +#define val r23 +#define src1 r24 +#define dst1 r25 +#define src2 r26 +#define dst2 r27 +#define len1 r28 +#define enddst r29 +#define endsrc r30 +#define saved_pfs r31 + +GLOBAL_ENTRY(__copy_user) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,((2*PIPE_DEPTH+7)&~7),0,((2*PIPE_DEPTH+7)&~7) + + .rotr val1[PIPE_DEPTH],val2[PIPE_DEPTH] + .rotp p[PIPE_DEPTH] + + adds len2=-1,len // br.ctop is repeat/until + mov ret0=r0 + + ;; // RAW of cfm when len=0 + cmp.eq p8,p0=r0,len // check for zero length + .save ar.lc, saved_lc + mov saved_lc=ar.lc // preserve ar.lc (slow) +(p8) br.ret.spnt.many rp // empty mempcy() + ;; + add enddst=dst,len // first byte after end of source + add endsrc=src,len // first byte after end of destination + .save pr, saved_pr + mov saved_pr=pr // preserve predicates + + .body + + mov dst1=dst // copy because of rotation + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + + mov src1=src // copy because of rotation + mov ar.lc=len2 // initialize lc for small count + cmp.lt p10,p7=COPY_BREAK,len // if len > COPY_BREAK then long copy + + xor tmp=src,dst // same alignment test prepare +(p10) br.cond.dptk .long_copy_user + ;; // RAW pr.rot/p16 ? + // + // Now we do the byte by byte loop with software pipeline + // + // p7 is necessarily false by now +1: + EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 1b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs // restore ar.ec + br.ret.sptk.many rp // end of short memcpy + + // + // Not 8-byte aligned + // +.diff_align_copy_user: + // At this point we know we have more than 16 bytes to copy + // and also that src and dest do _not_ have the same alignment. + and src2=0x7,src1 // src offset + and dst2=0x7,dst1 // dst offset + ;; + // The basic idea is that we copy byte-by-byte at the head so + // that we can reach 8-byte alignment for both src1 and dst1. + // Then copy the body using software pipelined 8-byte copy, + // shifting the two back-to-back words right and left, then copy + // the tail by copying byte-by-byte. + // + // Fault handling. If the byte-by-byte at the head fails on the + // load, then restart and finish the pipleline by copying zeros + // to the dst1. Then copy zeros for the rest of dst1. + // If 8-byte software pipeline fails on the load, do the same as + // failure_in3 does. If the byte-by-byte at the tail fails, it is + // handled simply by failure_in_pipe1. + // + // The case p14 represents the source has more bytes in the + // the first word (by the shifted part), whereas the p15 needs to + // copy some bytes from the 2nd word of the source that has the + // tail of the 1st of the destination. + // + + // + // Optimization. If dst1 is 8-byte aligned (quite common), we don't need + // to copy the head to dst1, to start 8-byte copy software pipeline. + // We know src1 is not 8-byte aligned in this case. + // + cmp.eq p14,p15=r0,dst2 +(p15) br.cond.spnt 1f + ;; + sub t1=8,src2 + mov t2=src2 + ;; + shl rshift=t2,3 + sub len1=len,t1 // set len1 + ;; + sub lshift=64,rshift + ;; + br.cond.spnt .word_copy_user + ;; +1: + cmp.leu p14,p15=src2,dst2 + sub t1=dst2,src2 + ;; + .pred.rel "mutex", p14, p15 +(p14) sub word1=8,src2 // (8 - src offset) +(p15) sub t1=r0,t1 // absolute value +(p15) sub word1=8,dst2 // (8 - dst offset) + ;; + // For the case p14, we don't need to copy the shifted part to + // the 1st word of destination. + sub t2=8,t1 +(p14) sub word1=word1,t1 + ;; + sub len1=len,word1 // resulting len +(p15) shl rshift=t1,3 // in bits +(p14) shl rshift=t2,3 + ;; +(p14) sub len1=len1,t1 + adds cnt=-1,word1 + ;; + sub lshift=64,rshift + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + mov ar.lc=cnt + ;; +2: + EX(.failure_in_pipe2,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 2b + ;; + clrrrb + ;; +.word_copy_user: + cmp.gtu p9,p0=16,len1 +(p9) br.cond.spnt 4f // if (16 > len1) skip 8-byte copy + ;; + shr.u cnt=len1,3 // number of 64-bit words + ;; + adds cnt=-1,cnt + ;; + .pred.rel "mutex", p14, p15 +(p14) sub src1=src1,t2 +(p15) sub src1=src1,t1 + // + // Now both src1 and dst1 point to an 8-byte aligned address. And + // we have more than 8 bytes to copy. + // + mov ar.lc=cnt + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + ;; +3: + // + // The pipleline consists of 3 stages: + // 1 (p16): Load a word from src1 + // 2 (EPI_1): Shift right pair, saving to tmp + // 3 (EPI): Store tmp to dst1 + // + // To make it simple, use at least 2 (p16) loops to set up val1[n] + // because we need 2 back-to-back val1[] to get tmp. + // Note that this implies EPI_2 must be p18 or greater. + // + +#define EPI_1 p[PIPE_DEPTH-2] +#define SWITCH(pred, shift) cmp.eq pred,p0=shift,rshift +#define CASE(pred, shift) \ + (pred) br.cond.spnt .copy_user_bit##shift +#define BODY(rshift) \ +.copy_user_bit##rshift: \ +1: \ + EX(.failure_out,(EPI) st8 [dst1]=tmp,8); \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ + EX(3f,(p16) ld8 val1[1]=[src1],8); \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk 1b; \ + ;; \ + br.cond.sptk.many .diff_align_do_tail; \ +2: \ +(EPI) st8 [dst1]=tmp,8; \ +(EPI_1) shrp tmp=val1[PIPE_DEPTH-2],val1[PIPE_DEPTH-1],rshift; \ +3: \ +(p16) mov val1[1]=r0; \ +(p16) mov val1[0]=r0; \ + br.ctop.dptk 2b; \ + ;; \ + br.cond.sptk.many .failure_in2 + + // + // Since the instruction 'shrp' requires a fixed 128-bit value + // specifying the bits to shift, we need to provide 7 cases + // below. + // + SWITCH(p6, 8) + SWITCH(p7, 16) + SWITCH(p8, 24) + SWITCH(p9, 32) + SWITCH(p10, 40) + SWITCH(p11, 48) + SWITCH(p12, 56) + ;; + CASE(p6, 8) + CASE(p7, 16) + CASE(p8, 24) + CASE(p9, 32) + CASE(p10, 40) + CASE(p11, 48) + CASE(p12, 56) + ;; + BODY(8) + BODY(16) + BODY(24) + BODY(32) + BODY(40) + BODY(48) + BODY(56) + ;; +.diff_align_do_tail: + .pred.rel "mutex", p14, p15 +(p14) sub src1=src1,t1 +(p14) adds dst1=-8,dst1 +(p15) sub dst1=dst1,t1 + ;; +4: + // Tail correction. + // + // The problem with this piplelined loop is that the last word is not + // loaded and thus parf of the last word written is not correct. + // To fix that, we simply copy the tail byte by byte. + + sub len1=endsrc,src1,1 + clrrrb + ;; + mov ar.ec=PIPE_DEPTH + mov pr.rot=1<<16 // p16=true all others are false + mov ar.lc=len1 + ;; +5: + EX(.failure_in_pipe1,(p16) ld1 val1[0]=[src1],1) + EX(.failure_out,(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1) + br.ctop.dptk.few 5b + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,0xffffffffffff0000 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // Beginning of long mempcy (i.e. > 16 bytes) + // +.long_copy_user: + tbit.nz p6,p7=src1,0 // odd alignment + and tmp=7,tmp + ;; + cmp.eq p10,p8=r0,tmp + mov len1=len // copy because of rotation +(p8) br.cond.dpnt .diff_align_copy_user + ;; + // At this point we know we have more than 16 bytes to copy + // and also that both src and dest have the same alignment + // which may not be the one we want. So for now we must move + // forward slowly until we reach 16byte alignment: no need to + // worry about reaching the end of buffer. + // + EX(.failure_in1,(p6) ld1 val1[0]=[src1],1) // 1-byte aligned +(p6) adds len1=-1,len1;; + tbit.nz p7,p0=src1,1 + ;; + EX(.failure_in1,(p7) ld2 val1[1]=[src1],2) // 2-byte aligned +(p7) adds len1=-2,len1;; + tbit.nz p8,p0=src1,2 + ;; + // + // Stop bit not required after ld4 because if we fail on ld4 + // we have never executed the ld1, therefore st1 is not executed. + // + EX(.failure_in1,(p8) ld4 val2[0]=[src1],4) // 4-byte aligned + ;; + EX(.failure_out,(p6) st1 [dst1]=val1[0],1) + tbit.nz p9,p0=src1,3 + ;; + // + // Stop bit not required after ld8 because if we fail on ld8 + // we have never executed the ld2, therefore st2 is not executed. + // + EX(.failure_in1,(p9) ld8 val2[1]=[src1],8) // 8-byte aligned + EX(.failure_out,(p7) st2 [dst1]=val1[1],2) +(p8) adds len1=-4,len1 + ;; + EX(.failure_out, (p8) st4 [dst1]=val2[0],4) +(p9) adds len1=-8,len1;; + shr.u cnt=len1,4 // number of 128-bit (2x64bit) words + ;; + EX(.failure_out, (p9) st8 [dst1]=val2[1],8) + tbit.nz p6,p0=len1,3 + cmp.eq p7,p0=r0,cnt + adds tmp=-1,cnt // br.ctop is repeat/until +(p7) br.cond.dpnt .dotail // we have less than 16 bytes left + ;; + adds src2=8,src1 + adds dst2=8,dst1 + mov ar.lc=tmp + ;; + // + // 16bytes/iteration + // +2: + EX(.failure_in3,(p16) ld8 val1[0]=[src1],16) +(p16) ld8 val2[0]=[src2],16 + + EX(.failure_out, (EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16) +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk 2b + ;; // RAW on src1 when fall through from loop + // + // Tail correction based on len only + // + // No matter where we come from (loop or test) the src1 pointer + // is 16 byte aligned AND we have less than 16 bytes to copy. + // +.dotail: + EX(.failure_in1,(p6) ld8 val1[0]=[src1],8) // at least 8 bytes + tbit.nz p7,p0=len1,2 + ;; + EX(.failure_in1,(p7) ld4 val1[1]=[src1],4) // at least 4 bytes + tbit.nz p8,p0=len1,1 + ;; + EX(.failure_in1,(p8) ld2 val2[0]=[src1],2) // at least 2 bytes + tbit.nz p9,p0=len1,0 + ;; + EX(.failure_out, (p6) st8 [dst1]=val1[0],8) + ;; + EX(.failure_in1,(p9) ld1 val2[1]=[src1]) // only 1 byte left + mov ar.lc=saved_lc + ;; + EX(.failure_out,(p7) st4 [dst1]=val1[1],4) + mov pr=saved_pr,0xffffffffffff0000 + ;; + EX(.failure_out, (p8) st2 [dst1]=val2[0],2) + mov ar.pfs=saved_pfs + ;; + EX(.failure_out, (p9) st1 [dst1]=val2[1]) + br.ret.sptk.many rp + + + // + // Here we handle the case where the byte by byte copy fails + // on the load. + // Several factors make the zeroing of the rest of the buffer kind of + // tricky: + // - the pipeline: loads/stores are not in sync (pipeline) + // + // In the same loop iteration, the dst1 pointer does not directly + // reflect where the faulty load was. + // + // - pipeline effect + // When you get a fault on load, you may have valid data from + // previous loads not yet store in transit. Such data must be + // store normally before moving onto zeroing the rest. + // + // - single/multi dispersal independence. + // + // solution: + // - we don't disrupt the pipeline, i.e. data in transit in + // the software pipeline will be eventually move to memory. + // We simply replace the load with a simple mov and keep the + // pipeline going. We can't really do this inline because + // p16 is always reset to 1 when lc > 0. + // +.failure_in_pipe1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +1: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk 1b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // This is the case where the byte by byte copy fails on the load + // when we copy the head. We need to finish the pipeline and copy + // zeros for the rest of the destination. Since this happens + // at the top we still need to fill the body and tail. +.failure_in_pipe2: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied +2: +(p16) mov val1[0]=r0 +(EPI) st1 [dst1]=val1[PIPE_DEPTH-1],1 + br.ctop.dptk 2b + ;; + sub len=enddst,dst1,1 // precompute len + br.cond.dptk.many .failure_in1bis + ;; + + // + // Here we handle the head & tail part when we check for alignment. + // The following code handles only the load failures. The + // main diffculty comes from the fact that loads/stores are + // scheduled. So when you fail on a load, the stores corresponding + // to previous successful loads must be executed. + // + // However some simplifications are possible given the way + // things work. + // + // 1) HEAD + // Theory of operation: + // + // Page A | Page B + // ---------|----- + // 1|8 x + // 1 2|8 x + // 4|8 x + // 1 4|8 x + // 2 4|8 x + // 1 2 4|8 x + // |1 + // |2 x + // |4 x + // + // page_size >= 4k (2^12). (x means 4, 2, 1) + // Here we suppose Page A exists and Page B does not. + // + // As we move towards eight byte alignment we may encounter faults. + // The numbers on each page show the size of the load (current alignment). + // + // Key point: + // - if you fail on 1, 2, 4 then you have never executed any smaller + // size loads, e.g. failing ld4 means no ld1 nor ld2 executed + // before. + // + // This allows us to simplify the cleanup code, because basically you + // only have to worry about "pending" stores in the case of a failing + // ld8(). Given the way the code is written today, this means only + // worry about st2, st4. There we can use the information encapsulated + // into the predicates. + // + // Other key point: + // - if you fail on the ld8 in the head, it means you went straight + // to it, i.e. 8byte alignment within an unexisting page. + // Again this comes from the fact that if you crossed just for the ld8 then + // you are 8byte aligned but also 16byte align, therefore you would + // either go for the 16byte copy loop OR the ld8 in the tail part. + // The combination ld1, ld2, ld4, ld8 where you fail on ld8 is impossible + // because it would mean you had 15bytes to copy in which case you + // would have defaulted to the byte by byte copy. + // + // + // 2) TAIL + // Here we now we have less than 16 bytes AND we are either 8 or 16 byte + // aligned. + // + // Key point: + // This means that we either: + // - are right on a page boundary + // OR + // - are at more than 16 bytes from a page boundary with + // at most 15 bytes to copy: no chance of crossing. + // + // This allows us to assume that if we fail on a load we haven't possibly + // executed any of the previous (tail) ones, so we don't need to do + // any stores. For instance, if we fail on ld2, this means we had + // 2 or 3 bytes left to copy and we did not execute the ld8 nor ld4. + // + // This means that we are in a situation similar the a fault in the + // head part. That's nice! + // +.failure_in1: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + sub len=endsrc,src1,1 + // + // we know that ret0 can never be zero at this point + // because we failed why trying to do a load, i.e. there is still + // some work to do. + // The failure_in1bis and length problem is taken care of at the + // calling side. + // + ;; +.failure_in1bis: // from (.failure_in3) + mov ar.lc=len // Continue with a stupid byte store. + ;; +5: + st1 [dst1]=r0,1 + br.cloop.dptk 5b + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // Here we simply restart the loop but instead + // of doing loads we fill the pipeline with zeroes + // We can't simply store r0 because we may have valid + // data in transit in the pipeline. + // ar.lc and ar.ec are setup correctly at this point + // + // we MUST use src1/endsrc here and not dst1/enddst because + // of the pipeline effect. + // +.failure_in3: + sub ret0=endsrc,src1 // number of bytes to zero, i.e. not copied + ;; +2: +(p16) mov val1[0]=r0 +(p16) mov val2[0]=r0 +(EPI) st8 [dst1]=val1[PIPE_DEPTH-1],16 +(EPI) st8 [dst2]=val2[PIPE_DEPTH-1],16 + br.ctop.dptk 2b + ;; + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk .failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + +.failure_in2: + sub ret0=endsrc,src1 + cmp.ne p6,p0=dst1,enddst // Do we need to finish the tail ? + sub len=enddst,dst1,1 // precompute len +(p6) br.cond.dptk .failure_in1bis + ;; + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + // + // handling of failures on stores: that's the easy part + // +.failure_out: + sub ret0=enddst,dst1 + mov pr=saved_pr,0xffffffffffff0000 + mov ar.lc=saved_lc + + mov ar.pfs=saved_pfs + br.ret.sptk.many rp +END(__copy_user) +EXPORT_SYMBOL(__copy_user) diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c new file mode 100644 index 000000000000..917e3138b277 --- /dev/null +++ b/arch/ia64/lib/csum_partial_copy.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Network Checksum & Copy routine + * + * Copyright (C) 1999, 2003-2004 Hewlett-Packard Co + * Stephane Eranian + * + * Most of the code has been imported from Linux/Alpha + */ + +#include +#include +#include + +#include + +/* + * XXX Fixme: those 2 inlines are meant for debugging and will go away + */ +static inline unsigned +short from64to16(unsigned long x) +{ + /* add up 32-bit words for 33 bits */ + x = (x & 0xffffffff) + (x >> 32); + /* add up 16-bit and 17-bit words for 17+c bits */ + x = (x & 0xffff) + (x >> 16); + /* add up 16-bit and 2-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +static inline +unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum) +{ + int odd, count; + unsigned long result = (unsigned long)psum; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long) buff; + if (odd) { + result = *buff << 8; + len--; + buff++; + } + count = len >> 1; /* nr of 16-bit words.. */ + if (count) { + if (2 & (unsigned long) buff) { + result += *(unsigned short *) buff; + count--; + len -= 2; + buff += 2; + } + count >>= 1; /* nr of 32-bit words.. */ + if (count) { + if (4 & (unsigned long) buff) { + result += *(unsigned int *) buff; + count--; + len -= 4; + buff += 4; + } + count >>= 1; /* nr of 64-bit words.. */ + if (count) { + unsigned long carry = 0; + do { + unsigned long w = *(unsigned long *) buff; + count--; + buff += 8; + result += carry; + result += w; + carry = (w > result); + } while (count); + result += carry; + result = (result & 0xffffffff) + (result >> 32); + } + if (len & 4) { + result += *(unsigned int *) buff; + buff += 4; + } + } + if (len & 2) { + result += *(unsigned short *) buff; + buff += 2; + } + } + if (len & 1) + result += *buff; + + result = from64to16(result); + + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); + +out: + return result; +} diff --git a/arch/ia64/lib/do_csum.S b/arch/ia64/lib/do_csum.S new file mode 100644 index 000000000000..6004dad2597c --- /dev/null +++ b/arch/ia64/lib/do_csum.S @@ -0,0 +1,324 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Optmized version of the standard do_csum() function + * + * Return: a 64bit quantity containing the 16bit Internet checksum + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 1999, 2001-2002 Hewlett-Packard Co + * Stephane Eranian + * + * 02/04/22 Ken Chen + * Data locality study on the checksum buffer. + * More optimization cleanup - remove excessive stop bits. + * 02/04/08 David Mosberger + * More cleanup and tuning. + * 01/04/18 Jun Nakajima + * Clean up and optimize and the software pipeline, loading two + * back-to-back 8-byte words per loop. Clean up the initialization + * for the loop. Support the cases where load latency = 1 or 2. + * Set CONFIG_IA64_LOAD_LATENCY to 1 or 2 (default). + */ + +#include + +// +// Theory of operations: +// The goal is to go as quickly as possible to the point where +// we can checksum 16 bytes/loop. Before reaching that point we must +// take care of incorrect alignment of first byte. +// +// The code hereafter also takes care of the "tail" part of the buffer +// before entering the core loop, if any. The checksum is a sum so it +// allows us to commute operations. So we do the "head" and "tail" +// first to finish at full speed in the body. Once we get the head and +// tail values, we feed them into the pipeline, very handy initialization. +// +// Of course we deal with the special case where the whole buffer fits +// into one 8 byte word. In this case we have only one entry in the pipeline. +// +// We use a (LOAD_LATENCY+2)-stage pipeline in the loop to account for +// possible load latency and also to accommodate for head and tail. +// +// The end of the function deals with folding the checksum from 64bits +// down to 16bits taking care of the carry. +// +// This version avoids synchronization in the core loop by also using a +// pipeline for the accumulation of the checksum in resultx[] (x=1,2). +// +// wordx[] (x=1,2) +// |---| +// | | 0 : new value loaded in pipeline +// |---| +// | | - : in transit data +// |---| +// | | LOAD_LATENCY : current value to add to checksum +// |---| +// | | LOAD_LATENCY+1 : previous value added to checksum +// |---| (previous iteration) +// +// resultx[] (x=1,2) +// |---| +// | | 0 : initial value +// |---| +// | | LOAD_LATENCY-1 : new checksum +// |---| +// | | LOAD_LATENCY : previous value of checksum +// |---| +// | | LOAD_LATENCY+1 : final checksum when out of the loop +// |---| +// +// +// See RFC1071 "Computing the Internet Checksum" for various techniques for +// calculating the Internet checksum. +// +// NOT YET DONE: +// - Maybe another algorithm which would take care of the folding at the +// end in a different manner +// - Work with people more knowledgeable than me on the network stack +// to figure out if we could not split the function depending on the +// type of packet or alignment we get. Like the ip_fast_csum() routine +// where we know we have at least 20bytes worth of data to checksum. +// - Do a better job of handling small packets. +// - Note on prefetching: it was found that under various load, i.e. ftp read/write, +// nfs read/write, the L1 cache hit rate is at 60% and L2 cache hit rate is at 99.8% +// on the data that buffer points to (partly because the checksum is often preceded by +// a copy_from_user()). This finding indiate that lfetch will not be beneficial since +// the data is already in the cache. +// + +#define saved_pfs r11 +#define hmask r16 +#define tmask r17 +#define first1 r18 +#define firstval r19 +#define firstoff r20 +#define last r21 +#define lastval r22 +#define lastoff r23 +#define saved_lc r24 +#define saved_pr r25 +#define tmp1 r26 +#define tmp2 r27 +#define tmp3 r28 +#define carry1 r29 +#define carry2 r30 +#define first2 r31 + +#define buf in0 +#define len in1 + +#define LOAD_LATENCY 2 // XXX fix me + +#if (LOAD_LATENCY != 1) && (LOAD_LATENCY != 2) +# error "Only 1 or 2 is supported/tested for LOAD_LATENCY." +#endif + +#define PIPE_DEPTH (LOAD_LATENCY+2) +#define ELD p[LOAD_LATENCY] // end of load +#define ELD_1 p[LOAD_LATENCY+1] // and next stage + +// unsigned long do_csum(unsigned char *buf,long len) + +GLOBAL_ENTRY(do_csum) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,2,16,0,16 + .rotr word1[4], word2[4],result1[LOAD_LATENCY+2],result2[LOAD_LATENCY+2] + .rotp p[PIPE_DEPTH], pC1[2], pC2[2] + mov ret0=r0 // in case we have zero length + cmp.lt p0,p6=r0,len // check for zero length or negative (32bit len) + ;; + add tmp1=buf,len // last byte's address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) +(p6) br.ret.spnt.many rp // return if zero or negative length + + mov hmask=-1 // initialize head mask + tbit.nz p15,p0=buf,0 // is buf an odd address? + and first1=-8,buf // 8-byte align down address of first1 element + + and firstoff=7,buf // how many bytes off for first1 element + mov tmask=-1 // initialize tail mask + + ;; + adds tmp2=-1,tmp1 // last-1 + and lastoff=7,tmp1 // how many bytes off for last element + ;; + sub tmp1=8,lastoff // complement to lastoff + and last=-8,tmp2 // address of word containing last byte + ;; + sub tmp3=last,first1 // tmp3=distance from first1 to last + .save ar.lc, saved_lc + mov saved_lc=ar.lc // save lc + cmp.eq p8,p9=last,first1 // everything fits in one word ? + + ld8 firstval=[first1],8 // load, ahead of time, "first1" word + and tmp1=7, tmp1 // make sure that if tmp1==8 -> tmp1=0 + shl tmp2=firstoff,3 // number of bits + ;; +(p9) ld8 lastval=[last] // load, ahead of time, "last" word, if needed + shl tmp1=tmp1,3 // number of bits +(p9) adds tmp3=-8,tmp3 // effectively loaded + ;; +(p8) mov lastval=r0 // we don't need lastval if first1==last + shl hmask=hmask,tmp2 // build head mask, mask off [0,first1off[ + shr.u tmask=tmask,tmp1 // build tail mask, mask off ]8,lastoff] + ;; + .body +#define count tmp3 + +(p8) and hmask=hmask,tmask // apply tail mask to head mask if 1 word only +(p9) and word2[0]=lastval,tmask // mask last it as appropriate + shr.u count=count,3 // how many 8-byte? + ;; + // If count is odd, finish this 8-byte word so that we can + // load two back-to-back 8-byte words per loop thereafter. + and word1[0]=firstval,hmask // and mask it as appropriate + tbit.nz p10,p11=count,0 // if (count is odd) + ;; +(p8) mov result1[0]=word1[0] +(p9) add result1[0]=word1[0],word2[0] + ;; + cmp.ltu p6,p0=result1[0],word1[0] // check the carry + cmp.eq.or.andcm p8,p0=0,count // exit if zero 8-byte + ;; +(p6) adds result1[0]=1,result1[0] +(p8) br.cond.dptk .do_csum_exit // if (within an 8-byte word) +(p11) br.cond.dptk .do_csum16 // if (count is even) + + // Here count is odd. + ld8 word1[1]=[first1],8 // load an 8-byte word + cmp.eq p9,p10=1,count // if (count == 1) + adds count=-1,count // loaded an 8-byte word + ;; + add result1[0]=result1[0],word1[1] + ;; + cmp.ltu p6,p0=result1[0],word1[1] + ;; +(p6) adds result1[0]=1,result1[0] +(p9) br.cond.sptk .do_csum_exit // if (count == 1) exit + // Fall through to calculate the checksum, feeding result1[0] as + // the initial value in result1[0]. + // + // Calculate the checksum loading two 8-byte words per loop. + // +.do_csum16: + add first2=8,first1 + shr.u count=count,1 // we do 16 bytes per loop + ;; + adds count=-1,count + mov carry1=r0 + mov carry2=r0 + brp.loop.imp 1f,2f + ;; + mov ar.ec=PIPE_DEPTH + mov ar.lc=count // set lc + mov pr.rot=1<<16 + // result1[0] must be initialized in advance. + mov result2[0]=r0 + ;; + .align 32 +1: +(ELD_1) cmp.ltu pC1[0],p0=result1[LOAD_LATENCY],word1[LOAD_LATENCY+1] +(pC1[1])adds carry1=1,carry1 +(ELD_1) cmp.ltu pC2[0],p0=result2[LOAD_LATENCY],word2[LOAD_LATENCY+1] +(pC2[1])adds carry2=1,carry2 +(ELD) add result1[LOAD_LATENCY-1]=result1[LOAD_LATENCY],word1[LOAD_LATENCY] +(ELD) add result2[LOAD_LATENCY-1]=result2[LOAD_LATENCY],word2[LOAD_LATENCY] +2: +(p[0]) ld8 word1[0]=[first1],16 +(p[0]) ld8 word2[0]=[first2],16 + br.ctop.sptk 1b + ;; + // Since len is a 32-bit value, carry cannot be larger than a 64-bit value. +(pC1[1])adds carry1=1,carry1 // since we miss the last one +(pC2[1])adds carry2=1,carry2 + ;; + add result1[LOAD_LATENCY+1]=result1[LOAD_LATENCY+1],carry1 + add result2[LOAD_LATENCY+1]=result2[LOAD_LATENCY+1],carry2 + ;; + cmp.ltu p6,p0=result1[LOAD_LATENCY+1],carry1 + cmp.ltu p7,p0=result2[LOAD_LATENCY+1],carry2 + ;; +(p6) adds result1[LOAD_LATENCY+1]=1,result1[LOAD_LATENCY+1] +(p7) adds result2[LOAD_LATENCY+1]=1,result2[LOAD_LATENCY+1] + ;; + add result1[0]=result1[LOAD_LATENCY+1],result2[LOAD_LATENCY+1] + ;; + cmp.ltu p6,p0=result1[0],result2[LOAD_LATENCY+1] + ;; +(p6) adds result1[0]=1,result1[0] + ;; +.do_csum_exit: + // + // now fold 64 into 16 bits taking care of carry + // that's not very good because it has lots of sequentiality + // + mov tmp3=0xffff + zxt4 tmp1=result1[0] + shr.u tmp2=result1[0],32 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add result1[0]=tmp1,tmp2 + ;; + and tmp1=result1[0],tmp3 + shr.u tmp2=result1[0],16 + ;; + add ret0=tmp1,tmp2 + mov pr=saved_pr,0xffffffffffff0000 + ;; + // if buf was odd then swap bytes + mov ar.pfs=saved_pfs // restore ar.ec +(p15) mux1 ret0=ret0,@rev // reverse word + ;; + mov ar.lc=saved_lc +(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes + br.ret.sptk.many rp + +// I (Jun Nakajima) wrote an equivalent code (see below), but it was +// not much better than the original. So keep the original there so that +// someone else can challenge. +// +// shr.u word1[0]=result1[0],32 +// zxt4 result1[0]=result1[0] +// ;; +// add result1[0]=result1[0],word1[0] +// ;; +// zxt2 result2[0]=result1[0] +// extr.u word1[0]=result1[0],16,16 +// shr.u carry1=result1[0],32 +// ;; +// add result2[0]=result2[0],word1[0] +// ;; +// add result2[0]=result2[0],carry1 +// ;; +// extr.u ret0=result2[0],16,16 +// ;; +// add ret0=ret0,result2[0] +// ;; +// zxt2 ret0=ret0 +// mov ar.pfs=saved_pfs // restore ar.ec +// mov pr=saved_pr,0xffffffffffff0000 +// ;; +// // if buf was odd then swap bytes +// mov ar.lc=saved_lc +//(p15) mux1 ret0=ret0,@rev // reverse word +// ;; +//(p15) shr.u ret0=ret0,64-16 // + shift back to position = swap bytes +// br.ret.sptk.many rp + +END(do_csum) diff --git a/arch/ia64/lib/flush.S b/arch/ia64/lib/flush.S new file mode 100644 index 000000000000..f8e795fe45cb --- /dev/null +++ b/arch/ia64/lib/flush.S @@ -0,0 +1,119 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Cache flushing routines. + * + * Copyright (C) 1999-2001, 2005 Hewlett-Packard Co + * David Mosberger-Tang + * + * 05/28/05 Zoltan Menyhart Dynamic stride size + */ + +#include +#include + + /* + * flush_icache_range(start,end) + * + * Make i-cache(s) coherent with d-caches. + * + * Must deal with range from start to end-1 but nothing else (need to + * be careful not to touch addresses that may be unmapped). + * + * Note: "in0" and "in1" are preserved for debugging purposes. + */ + .section .kprobes.text,"ax" +GLOBAL_ENTRY(flush_icache_range) + + .prologue + alloc r2=ar.pfs,2,0,0,0 + movl r3=ia64_i_cache_stride_shift + mov r21=1 + ;; + ld8 r20=[r3] // r20: stride shift + sub r22=in1,r0,1 // last byte address + ;; + shr.u r23=in0,r20 // start / (stride size) + shr.u r22=r22,r20 // (last byte address) / (stride size) + shl r21=r21,r20 // r21: stride size of the i-cache(s) + ;; + sub r8=r22,r23 // number of strides - 1 + shl r24=r23,r20 // r24: addresses for "fc.i" = + // "start" rounded down to stride boundary + .save ar.lc,r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + mov ar.lc=r8 + ;; + /* + * 32 byte aligned loop, even number of (actually 2) bundles + */ +.Loop: fc.i r24 // issuable on M0 only + add r24=r21,r24 // we flush "stride size" bytes per iteration + nop.i 0 + br.cloop.sptk.few .Loop + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(flush_icache_range) +EXPORT_SYMBOL_GPL(flush_icache_range) + + /* + * clflush_cache_range(start,size) + * + * Flush cache lines from start to start+size-1. + * + * Must deal with range from start to start+size-1 but nothing else + * (need to be careful not to touch addresses that may be + * unmapped). + * + * Note: "in0" and "in1" are preserved for debugging purposes. + */ + .section .kprobes.text,"ax" +GLOBAL_ENTRY(clflush_cache_range) + + .prologue + alloc r2=ar.pfs,2,0,0,0 + movl r3=ia64_cache_stride_shift + mov r21=1 + add r22=in1,in0 + ;; + ld8 r20=[r3] // r20: stride shift + sub r22=r22,r0,1 // last byte address + ;; + shr.u r23=in0,r20 // start / (stride size) + shr.u r22=r22,r20 // (last byte address) / (stride size) + shl r21=r21,r20 // r21: stride size of the i-cache(s) + ;; + sub r8=r22,r23 // number of strides - 1 + shl r24=r23,r20 // r24: addresses for "fc" = + // "start" rounded down to stride + // boundary + .save ar.lc,r3 + mov r3=ar.lc // save ar.lc + ;; + + .body + mov ar.lc=r8 + ;; + /* + * 32 byte aligned loop, even number of (actually 2) bundles + */ +.Loop_fc: + fc r24 // issuable on M0 only + add r24=r21,r24 // we flush "stride size" bytes per iteration + nop.i 0 + br.cloop.sptk.few .Loop_fc + ;; + sync.i + ;; + srlz.i + ;; + mov ar.lc=r3 // restore ar.lc + br.ret.sptk.many rp +END(clflush_cache_range) diff --git a/arch/ia64/lib/idiv32.S b/arch/ia64/lib/idiv32.S new file mode 100644 index 000000000000..83586fbc51ff --- /dev/null +++ b/arch/ia64/lib/idiv32.S @@ -0,0 +1,86 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2000 Hewlett-Packard Co + * Copyright (C) 2000 David Mosberger-Tang + * + * 32-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) + */ + +#include +#include + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define EXTEND zxt4 +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define EXTEND sxt4 +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,si3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + mov r2 = 0xffdd // r2 = -34 + 65535 (fp reg format bias) + EXTEND in0 = in0 // in0 = a + EXTEND in1 = in1 // in1 = b + ;; + setf.sig f8 = in0 + setf.sig f9 = in1 +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + setf.exp f7 = r2 // f7 = 2^-34 + frcpa.s1 f6, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f8 = f8, f6 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f6, f1 // e0 = -b*y0 + 1 + ;; +#ifdef MODULO + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f8 = f6, f8, f8 // q1 = e0*q0 + q0 +(p6) fma.s1 f6 = f6, f6, f7 // e1 = e0*e0 + 2^-34 + ;; +#ifdef MODULO + setf.sig f7 = in0 +#endif +(p6) fma.s1 f6 = f6, f8, f8 // q2 = e1*q1 + q1 + ;; + FP_TO_INT(f6, f6) // q = trunc(q2) + ;; +#ifdef MODULO + xma.l f6 = f6, f9, f7 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f6 // transfer result to result register + br.ret.sptk.many rp +END(NAME) +EXPORT_SYMBOL(NAME) diff --git a/arch/ia64/lib/idiv64.S b/arch/ia64/lib/idiv64.S new file mode 100644 index 000000000000..5c9113691f72 --- /dev/null +++ b/arch/ia64/lib/idiv64.S @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 1999-2000 Hewlett-Packard Co + * Copyright (C) 1999-2000 David Mosberger-Tang + * + * 64-bit integer division. + * + * This code is based on the application note entitled "Divide, Square Root + * and Remainder Algorithms for the IA-64 Architecture". This document + * is available as Intel document number 248725-002 or via the web at + * http://developer.intel.com/software/opensource/numerics/ + * + * For more details on the theory behind these algorithms, see "IA-64 + * and Elementary Functions" by Peter Markstein; HP Professional Books + * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions) + */ + +#include +#include + +#ifdef MODULO +# define OP mod +#else +# define OP div +#endif + +#ifdef UNSIGNED +# define SGN u +# define INT_TO_FP(a,b) fcvt.xuf.s1 a=b +# define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b +#else +# define SGN +# define INT_TO_FP(a,b) fcvt.xf a=b +# define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b +#endif + +#define PASTE1(a,b) a##b +#define PASTE(a,b) PASTE1(a,b) +#define NAME PASTE(PASTE(__,SGN),PASTE(OP,di3)) + +GLOBAL_ENTRY(NAME) + .regstk 2,0,0,0 + // Transfer inputs to FP registers. + setf.sig f8 = in0 + setf.sig f9 = in1 + ;; + // Convert the inputs to FP, to avoid FP software-assist faults. + INT_TO_FP(f8, f8) + INT_TO_FP(f9, f9) + ;; + frcpa.s1 f11, p6 = f8, f9 // y0 = frcpa(b) + ;; +(p6) fmpy.s1 f7 = f8, f11 // q0 = a*y0 +(p6) fnma.s1 f6 = f9, f11, f1 // e0 = -b*y0 + 1 + ;; +(p6) fma.s1 f10 = f7, f6, f7 // q1 = q0*e0 + q0 +(p6) fmpy.s1 f7 = f6, f6 // e1 = e0*e0 + ;; +#ifdef MODULO + sub in1 = r0, in1 // in1 = -b +#endif +(p6) fma.s1 f10 = f10, f7, f10 // q2 = q1*e1 + q1 +(p6) fma.s1 f6 = f11, f6, f11 // y1 = y0*e0 + y0 + ;; +(p6) fma.s1 f6 = f6, f7, f6 // y2 = y1*e1 + y1 +(p6) fnma.s1 f7 = f9, f10, f8 // r = -b*q2 + a + ;; +#ifdef MODULO + setf.sig f8 = in0 // f8 = a + setf.sig f9 = in1 // f9 = -b +#endif +(p6) fma.s1 f11 = f7, f6, f10 // q3 = r*y2 + q2 + ;; + FP_TO_INT(f11, f11) // q = trunc(q3) + ;; +#ifdef MODULO + xma.l f11 = f11, f9, f8 // r = q*(-b) + a + ;; +#endif + getf.sig r8 = f11 // transfer result to result register + br.ret.sptk.many rp +END(NAME) +EXPORT_SYMBOL(NAME) diff --git a/arch/ia64/lib/io.c b/arch/ia64/lib/io.c new file mode 100644 index 000000000000..c3e02462ed16 --- /dev/null +++ b/arch/ia64/lib/io.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +#include + +/* + * Copy data from IO memory space to "real" memory space. + * This needs to be optimized. + */ +void memcpy_fromio(void *to, const volatile void __iomem *from, long count) +{ + char *dst = to; + + while (count) { + count--; + *dst++ = readb(from++); + } +} +EXPORT_SYMBOL(memcpy_fromio); + +/* + * Copy data from "real" memory space to IO memory space. + * This needs to be optimized. + */ +void memcpy_toio(volatile void __iomem *to, const void *from, long count) +{ + const char *src = from; + + while (count) { + count--; + writeb(*src++, to++); + } +} +EXPORT_SYMBOL(memcpy_toio); + +/* + * "memset" on IO memory space. + * This needs to be optimized. + */ +void memset_io(volatile void __iomem *dst, int c, long count) +{ + unsigned char ch = (char)(c & 0xff); + + while (count) { + count--; + writeb(ch, dst); + dst++; + } +} +EXPORT_SYMBOL(memset_io); diff --git a/arch/ia64/lib/ip_fast_csum.S b/arch/ia64/lib/ip_fast_csum.S new file mode 100644 index 000000000000..fcc0b812ce2e --- /dev/null +++ b/arch/ia64/lib/ip_fast_csum.S @@ -0,0 +1,148 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Optmized version of the ip_fast_csum() function + * Used for calculating IP header checksum + * + * Return: 16bit checksum, complemented + * + * Inputs: + * in0: address of buffer to checksum (char *) + * in1: length of the buffer (int) + * + * Copyright (C) 2002, 2006 Intel Corp. + * Copyright (C) 2002, 2006 Ken Chen + */ + +#include +#include + +/* + * Since we know that most likely this function is called with buf aligned + * on 4-byte boundary and 20 bytes in length, we can execution rather quickly + * versus calling generic version of do_csum, which has lots of overhead in + * handling various alignments and sizes. However, due to lack of constrains + * put on the function input argument, cases with alignment not on 4-byte or + * size not equal to 20 bytes will be handled by the generic do_csum function. + */ + +#define in0 r32 +#define in1 r33 +#define in2 r34 +#define in3 r35 +#define in4 r36 +#define ret0 r8 + +GLOBAL_ENTRY(ip_fast_csum) + .prologue + .body + cmp.ne p6,p7=5,in1 // size other than 20 byte? + and r14=3,in0 // is it aligned on 4-byte? + add r15=4,in0 // second source pointer + ;; + cmp.ne.or.andcm p6,p7=r14,r0 + ;; +(p7) ld4 r20=[in0],8 +(p7) ld4 r21=[r15],8 +(p6) br.spnt .generic + ;; + ld4 r22=[in0],8 + ld4 r23=[r15],8 + ;; + ld4 r24=[in0] + add r20=r20,r21 + add r22=r22,r23 + ;; + add r20=r20,r22 + ;; + add r20=r20,r24 + ;; + shr.u ret0=r20,16 // now need to add the carry + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 // add carry again + zxt2 r20=r20 + ;; + add r20=ret0,r20 + ;; + shr.u ret0=r20,16 + zxt2 r20=r20 + ;; + add r20=ret0,r20 + mov r9=0xffff + ;; + andcm ret0=r9,r20 + .restore sp // reset frame state + br.ret.sptk.many b0 + ;; + +.generic: + .prologue + .save ar.pfs, r35 + alloc r35=ar.pfs,2,2,2,0 + .save rp, r34 + mov r34=b0 + .body + dep.z out1=in1,2,30 + mov out0=in0 + ;; + br.call.sptk.many b0=do_csum + ;; + andcm ret0=-1,ret0 + mov ar.pfs=r35 + mov b0=r34 + br.ret.sptk.many b0 +END(ip_fast_csum) +EXPORT_SYMBOL(ip_fast_csum) + +GLOBAL_ENTRY(csum_ipv6_magic) + ld4 r20=[in0],4 + ld4 r21=[in1],4 + zxt4 in2=in2 + ;; + ld4 r22=[in0],4 + ld4 r23=[in1],4 + dep r15=in3,in2,32,16 + ;; + ld4 r24=[in0],4 + ld4 r25=[in1],4 + mux1 r15=r15,@rev + add r16=r20,r21 + add r17=r22,r23 + zxt4 in4=in4 + ;; + ld4 r26=[in0],4 + ld4 r27=[in1],4 + shr.u r15=r15,16 + add r18=r24,r25 + add r8=r16,r17 + ;; + add r19=r26,r27 + add r8=r8,r18 + ;; + add r8=r8,r19 + add r15=r15,in4 + ;; + add r8=r8,r15 + ;; + shr.u r10=r8,32 // now fold sum into short + zxt4 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // yeah, keep it rolling + zxt2 r11=r8 + ;; + add r8=r10,r11 + ;; + shr.u r10=r8,16 // three times lucky + zxt2 r11=r8 + ;; + add r8=r10,r11 + mov r9=0xffff + ;; + andcm r8=r9,r8 + br.ret.sptk.many b0 +END(csum_ipv6_magic) +EXPORT_SYMBOL(csum_ipv6_magic) diff --git a/arch/ia64/lib/memcpy.S b/arch/ia64/lib/memcpy.S new file mode 100644 index 000000000000..35c9069a8345 --- /dev/null +++ b/arch/ia64/lib/memcpy.S @@ -0,0 +1,304 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Optimized version of the standard memcpy() function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * no return value + * + * Copyright (C) 2000-2001 Hewlett-Packard Co + * Stephane Eranian + * David Mosberger-Tang + */ +#include +#include + +GLOBAL_ENTRY(memcpy) + +# define MEM_LAT 21 /* latency to memory */ + +# define dst r2 +# define src r3 +# define retval r8 +# define saved_pfs r9 +# define saved_lc r10 +# define saved_pr r11 +# define cnt r16 +# define src2 r17 +# define t0 r18 +# define t1 r19 +# define t2 r20 +# define t3 r21 +# define t4 r22 +# define src_end r23 + +# define N (MEM_LAT + 4) +# define Nrot ((N + 7) & ~7) + + /* + * First, check if everything (src, dst, len) is a multiple of eight. If + * so, we handle everything with no taken branches (other than the loop + * itself) and a small icache footprint. Otherwise, we jump off to + * the more general copy routine handling arbitrary + * sizes/alignment etc. + */ + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot,0,Nrot + .save ar.lc, saved_lc + mov saved_lc=ar.lc + or t0=in0,in1 + ;; + + or t0=t0,in2 + .save pr, saved_pr + mov saved_pr=pr + + .body + + cmp.eq p6,p0=in2,r0 // zero length? + mov retval=in0 // return dst +(p6) br.ret.spnt.many rp // zero length, return immediately + ;; + + mov dst=in0 // copy because of rotation + shr.u cnt=in2,3 // number of 8-byte words to copy + mov pr.rot=1<<16 + ;; + + adds cnt=-1,cnt // br.ctop is repeat/until + cmp.gtu p7,p0=16,in2 // copying less than 16 bytes? + mov ar.ec=N + ;; + + and t0=0x7,t0 + mov ar.lc=cnt + ;; + cmp.ne p6,p0=t0,r0 + + mov src=in1 // copy because of rotation +(p7) br.cond.spnt.few .memcpy_short +(p6) br.cond.spnt.few .memcpy_long + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; + .rotr val[N] + .rotp p[N] + .align 32 +1: { .mib +(p[0]) ld8 val[0]=[src],8 + nop.i 0 + brp.loop.imp 1b, 2f +} +2: { .mfb +(p[N-1])st8 [dst]=val[N-1],8 + nop.f 0 + br.ctop.dptk.few 1b +} + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Small (<16 bytes) unaligned copying is done via a simple byte-at-the-time + * copy loop. This performs relatively poorly on Itanium, but it doesn't + * get used very often (gcc inlines small copies) and due to atomicity + * issues, we want to avoid read-modify-write of entire words. + */ + .align 32 +.memcpy_short: + adds cnt=-1,in2 // br.ctop is repeat/until + mov ar.ec=MEM_LAT + brp.loop.imp 1f, 2f + ;; + mov ar.lc=cnt + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; + nop.m 0 + ;; + /* + * It is faster to put a stop bit in the loop here because it makes + * the pipeline shorter (and latency is what matters on short copies). + */ + .align 32 +1: { .mib +(p[0]) ld1 val[0]=[src],1 + nop.i 0 + brp.loop.imp 1b, 2f +} ;; +2: { .mfb +(p[MEM_LAT-1])st1 [dst]=val[MEM_LAT-1],1 + nop.f 0 + br.ctop.dptk.few 1b +} ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov ar.pfs=saved_pfs + br.ret.sptk.many rp + + /* + * Large (>= 16 bytes) copying is done in a fancy way. Latency isn't + * an overriding concern here, but throughput is. We first do + * sub-word copying until the destination is aligned, then we check + * if the source is also aligned. If so, we do a simple load/store-loop + * until there are less than 8 bytes left over and then we do the tail, + * by storing the last few bytes using sub-word copying. If the source + * is not aligned, we branch off to the non-congruent loop. + * + * stage: op: + * 0 ld + * : + * MEM_LAT+3 shrp + * MEM_LAT+4 st + * + * On Itanium, the pipeline itself runs without stalls. However, br.ctop + * seems to introduce an unavoidable bubble in the pipeline so the overall + * latency is 2 cycles/iteration. This gives us a _copy_ throughput + * of 4 byte/cycle. Still not bad. + */ +# undef N +# undef Nrot +# define N (MEM_LAT + 5) /* number of stages */ +# define Nrot ((N+1 + 2 + 7) & ~7) /* number of rotating regs */ + +#define LOG_LOOP_SIZE 6 + +.memcpy_long: + alloc t3=ar.pfs,3,Nrot,0,Nrot // resize register frame + and t0=-8,src // t0 = src & ~7 + and t2=7,src // t2 = src & 7 + ;; + ld8 t0=[t0] // t0 = 1st source word + adds src2=7,src // src2 = (src + 7) + sub t4=r0,dst // t4 = -dst + ;; + and src2=-8,src2 // src2 = (src + 7) & ~7 + shl t2=t2,3 // t2 = 8*(src & 7) + shl t4=t4,3 // t4 = 8*(dst & 7) + ;; + ld8 t1=[src2] // t1 = 1st source word if src is 8-byte aligned, 2nd otherwise + sub t3=64,t2 // t3 = 64-8*(src & 7) + shr.u t0=t0,t2 + ;; + add src_end=src,in2 + shl t1=t1,t3 + mov pr=t4,0x38 // (p5,p4,p3)=(dst & 7) + ;; + or t0=t0,t1 + mov cnt=r0 + adds src_end=-1,src_end + ;; +(p3) st1 [dst]=t0,1 +(p3) shr.u t0=t0,8 +(p3) adds cnt=1,cnt + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 +(p4) adds cnt=2,cnt + ;; +(p5) st4 [dst]=t0,4 +(p5) adds cnt=4,cnt + and src_end=-8,src_end // src_end = last word of source buffer + ;; + + // At this point, dst is aligned to 8 bytes and there at least 16-7=9 bytes left to copy: + +1:{ add src=cnt,src // make src point to remainder of source buffer + sub cnt=in2,cnt // cnt = number of bytes left to copy + mov t4=ip + } ;; + and src2=-8,src // align source pointer + adds t4=.memcpy_loops-1b,t4 + mov ar.ec=N + + and t0=7,src // t0 = src & 7 + shr.u t2=cnt,3 // t2 = number of 8-byte words left to copy + shl cnt=cnt,3 // move bits 0-2 to 3-5 + ;; + + .rotr val[N+1], w[2] + .rotp p[N] + + cmp.ne p6,p0=t0,r0 // is src aligned, too? + shl t0=t0,LOG_LOOP_SIZE // t0 = 8*(src & 7) + adds t2=-1,t2 // br.ctop is repeat/until + ;; + add t4=t0,t4 + mov pr=cnt,0x38 // set (p5,p4,p3) to # of bytes last-word bytes to copy + mov ar.lc=t2 + ;; + nop.m 0 + ;; + nop.m 0 + nop.i 0 + ;; + nop.m 0 + ;; +(p6) ld8 val[1]=[src2],8 // prime the pump... + mov b6=t4 + br.sptk.few b6 + ;; + +.memcpy_tail: + // At this point, (p5,p4,p3) are set to the number of bytes left to copy (which is + // less than 8) and t0 contains the last few bytes of the src buffer: +(p5) st4 [dst]=t0,4 +(p5) shr.u t0=t0,32 + mov ar.lc=saved_lc + ;; +(p4) st2 [dst]=t0,2 +(p4) shr.u t0=t0,16 + mov ar.pfs=saved_pfs + ;; +(p3) st1 [dst]=t0 + mov pr=saved_pr,-1 + br.ret.sptk.many rp + +/////////////////////////////////////////////////////// + .align 64 + +#define COPY(shift,index) \ + 1: { .mib \ + (p[0]) ld8 val[0]=[src2],8; \ + (p[MEM_LAT+3]) shrp w[0]=val[MEM_LAT+3],val[MEM_LAT+4-index],shift; \ + brp.loop.imp 1b, 2f \ + }; \ + 2: { .mfb \ + (p[MEM_LAT+4]) st8 [dst]=w[1],8; \ + nop.f 0; \ + br.ctop.dptk.few 1b; \ + }; \ + ;; \ + ld8 val[N-1]=[src_end]; /* load last word (may be same as val[N]) */ \ + ;; \ + shrp t0=val[N-1],val[N-index],shift; \ + br .memcpy_tail +.memcpy_loops: + COPY(0, 1) /* no point special casing this---it doesn't go any faster without shrp */ + COPY(8, 0) + COPY(16, 0) + COPY(24, 0) + COPY(32, 0) + COPY(40, 0) + COPY(48, 0) + COPY(56, 0) + +END(memcpy) +EXPORT_SYMBOL(memcpy) diff --git a/arch/ia64/lib/memcpy_mck.S b/arch/ia64/lib/memcpy_mck.S new file mode 100644 index 000000000000..c0d4362217ae --- /dev/null +++ b/arch/ia64/lib/memcpy_mck.S @@ -0,0 +1,659 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Itanium 2-optimized version of memcpy and copy_user function + * + * Inputs: + * in0: destination address + * in1: source address + * in2: number of bytes to copy + * Output: + * for memcpy: return dest + * for copy_user: return 0 if success, + * or number of byte NOT copied if error occurred. + * + * Copyright (C) 2002 Intel Corp. + * Copyright (C) 2002 Ken Chen + */ +#include +#include +#include + +#define EK(y...) EX(y) + +/* McKinley specific optimization */ + +#define retval r8 +#define saved_pfs r31 +#define saved_lc r10 +#define saved_pr r11 +#define saved_in0 r14 +#define saved_in1 r15 +#define saved_in2 r16 + +#define src0 r2 +#define src1 r3 +#define dst0 r17 +#define dst1 r18 +#define cnt r9 + +/* r19-r30 are temp for each code section */ +#define PREFETCH_DIST 8 +#define src_pre_mem r19 +#define dst_pre_mem r20 +#define src_pre_l2 r21 +#define dst_pre_l2 r22 +#define t1 r23 +#define t2 r24 +#define t3 r25 +#define t4 r26 +#define t5 t1 // alias! +#define t6 t2 // alias! +#define t7 t3 // alias! +#define n8 r27 +#define t9 t5 // alias! +#define t10 t4 // alias! +#define t11 t7 // alias! +#define t12 t6 // alias! +#define t14 t10 // alias! +#define t13 r28 +#define t15 r29 +#define tmp r30 + +/* defines for long_copy block */ +#define A 0 +#define B (PREFETCH_DIST) +#define C (B + PREFETCH_DIST) +#define D (C + 1) +#define N (D + 1) +#define Nrot ((N + 7) & ~7) + +/* alias */ +#define in0 r32 +#define in1 r33 +#define in2 r34 + +GLOBAL_ENTRY(memcpy) + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f0 + mov retval=in0 + br.cond.sptk .common_code + ;; +END(memcpy) +EXPORT_SYMBOL(memcpy) +GLOBAL_ENTRY(__copy_user) + .prologue +// check dest alignment + and r28=0x7,in0 + and r29=0x7,in1 + mov f6=f1 + mov saved_in0=in0 // save dest pointer + mov saved_in1=in1 // save src pointer + mov retval=r0 // initialize return value + ;; +.common_code: + cmp.gt p15,p0=8,in2 // check for small size + cmp.ne p13,p0=0,r28 // check dest alignment + cmp.ne p14,p0=0,r29 // check src alignment + add src0=0,in1 + sub r30=8,r28 // for .align_dest + mov saved_in2=in2 // save len + ;; + add dst0=0,in0 + add dst1=1,in0 // dest odd index + cmp.le p6,p0 = 1,r30 // for .align_dest +(p15) br.cond.dpnt .memcpy_short +(p13) br.cond.dpnt .align_dest +(p14) br.cond.dpnt .unaligned_src + ;; + +// both dest and src are aligned on 8-byte boundary +.aligned_src: + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,Nrot-3,0,Nrot + .save pr, saved_pr + mov saved_pr=pr + + shr.u cnt=in2,7 // this much cache line + ;; + cmp.lt p6,p0=2*PREFETCH_DIST,cnt + cmp.lt p7,p8=1,cnt + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .body + add cnt=-1,cnt + add src_pre_mem=0,in1 // prefetch src pointer + add dst_pre_mem=0,in0 // prefetch dest pointer + ;; +(p7) mov ar.lc=cnt // prefetch count +(p8) mov ar.lc=r0 +(p6) br.cond.dpnt .long_copy + ;; + +.prefetch: + lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few .prefetch + ;; + +.medium_copy: + and tmp=31,in2 // copy length after iteration + shr.u r29=in2,5 // number of 32-byte iteration + add dst1=8,dst0 // 2nd dest pointer + ;; + add cnt=-1,r29 // ctop iteration adjustment + cmp.eq p10,p0=r29,r0 // do we really need to loop? + add src1=8,src0 // 2nd src pointer + cmp.le p6,p0=8,tmp + ;; + cmp.le p7,p0=16,tmp + mov ar.lc=cnt // loop setup + cmp.eq p16,p17 = r0,r0 + mov ar.ec=2 +(p10) br.dpnt.few .aligned_src_tail + ;; + TEXT_ALIGN(32) +1: +EX(.ex_handler, (p16) ld8 r34=[src0],16) +EK(.ex_handler, (p16) ld8 r38=[src1],16) +EX(.ex_handler, (p17) st8 [dst0]=r33,16) +EK(.ex_handler, (p17) st8 [dst1]=r37,16) + ;; +EX(.ex_handler, (p16) ld8 r32=[src0],16) +EK(.ex_handler, (p16) ld8 r36=[src1],16) +EX(.ex_handler, (p16) st8 [dst0]=r34,16) +EK(.ex_handler, (p16) st8 [dst1]=r38,16) + br.ctop.dptk.few 1b + ;; + +.aligned_src_tail: +EX(.ex_handler, (p6) ld8 t1=[src0]) + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs +EX(.ex_hndlr_s, (p7) ld8 t2=[src1],8) + cmp.le p8,p0=24,tmp + and r21=-8,tmp + ;; +EX(.ex_hndlr_s, (p8) ld8 t3=[src1]) +EX(.ex_handler, (p6) st8 [dst0]=t1) // store byte 1 + and in2=7,tmp // remaining length +EX(.ex_hndlr_d, (p7) st8 [dst1]=t2,8) // store byte 2 + add src0=src0,r21 // setting up src pointer + add dst0=dst0,r21 // setting up dest pointer + ;; +EX(.ex_handler, (p8) st8 [dst1]=t3) // store byte 3 + mov pr=saved_pr,-1 + br.dptk.many .memcpy_short + ;; + +/* code taken from copy_page_mck */ +.long_copy: + .rotr v[2*PREFETCH_DIST] + .rotp p[N] + + mov src_pre_mem = src0 + mov pr.rot = 0x10000 + mov ar.ec = 1 // special unrolled loop + + mov dst_pre_mem = dst0 + + add src_pre_l2 = 8*8, src0 + add dst_pre_l2 = 8*8, dst0 + ;; + add src0 = 8, src_pre_mem // first t1 src + mov ar.lc = 2*PREFETCH_DIST - 1 + shr.u cnt=in2,7 // number of lines + add src1 = 3*8, src_pre_mem // first t3 src + add dst0 = 8, dst_pre_mem // first t1 dst + add dst1 = 3*8, dst_pre_mem // first t3 dst + ;; + and tmp=127,in2 // remaining bytes after this block + add cnt = -(2*PREFETCH_DIST) - 1, cnt + // same as .line_copy loop, but with all predicated-off instructions removed: +.prefetch_loop: +EX(.ex_hndlr_lcpy_1, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 +EK(.ex_hndlr_lcpy_1, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 + br.ctop.sptk .prefetch_loop + ;; + cmp.eq p16, p0 = r0, r0 // reset p16 to 1 + mov ar.lc = cnt + mov ar.ec = N // # of stages in pipeline + ;; +.line_copy: +EX(.ex_handler, (p[D]) ld8 t2 = [src0], 3*8) // M0 +EK(.ex_handler, (p[D]) ld8 t4 = [src1], 3*8) // M1 +EX(.ex_handler_lcpy, (p[B]) st8 [dst_pre_mem] = v[B], 128) // M2 prefetch dst from memory +EK(.ex_handler_lcpy, (p[D]) st8 [dst_pre_l2] = n8, 128) // M3 prefetch dst from L2 + ;; +EX(.ex_handler_lcpy, (p[A]) ld8 v[A] = [src_pre_mem], 128) // M0 prefetch src from memory +EK(.ex_handler_lcpy, (p[C]) ld8 n8 = [src_pre_l2], 128) // M1 prefetch src from L2 +EX(.ex_handler, (p[D]) st8 [dst0] = t1, 8) // M2 +EK(.ex_handler, (p[D]) st8 [dst1] = t3, 8) // M3 + ;; +EX(.ex_handler, (p[D]) ld8 t5 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t7 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t2, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t4, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t6 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t10 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t5, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t7, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t9 = [src0], 3*8) +EK(.ex_handler, (p[D]) ld8 t11 = [src1], 3*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t6, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t10, 8) + ;; +EX(.ex_handler, (p[D]) ld8 t12 = [src0], 8) +EK(.ex_handler, (p[D]) ld8 t14 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t9, 3*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t11, 3*8) + ;; +EX(.ex_handler, (p[D]) ld8 t13 = [src0], 4*8) +EK(.ex_handler, (p[D]) ld8 t15 = [src1], 4*8) +EX(.ex_handler, (p[D]) st8 [dst0] = t12, 8) +EK(.ex_handler, (p[D]) st8 [dst1] = t14, 8) + ;; +EX(.ex_handler, (p[C]) ld8 t1 = [src0], 8) +EK(.ex_handler, (p[C]) ld8 t3 = [src1], 8) +EX(.ex_handler, (p[D]) st8 [dst0] = t13, 4*8) +EK(.ex_handler, (p[D]) st8 [dst1] = t15, 4*8) + br.ctop.sptk .line_copy + ;; + + add dst0=-8,dst0 + add src0=-8,src0 + mov in2=tmp + .restore sp + br.sptk.many .medium_copy + ;; + +#define BLOCK_SIZE 128*32 +#define blocksize r23 +#define curlen r24 + +// dest is on 8-byte boundary, src is not. We need to do +// ld8-ld8, shrp, then st8. Max 8 byte copy per cycle. +.unaligned_src: + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,3,5,0,8 + .save ar.lc, saved_lc + mov saved_lc=ar.lc + .save pr, saved_pr + mov saved_pr=pr + .body +.4k_block: + mov saved_in0=dst0 // need to save all input arguments + mov saved_in2=in2 + mov blocksize=BLOCK_SIZE + ;; + cmp.lt p6,p7=blocksize,in2 + mov saved_in1=src0 + ;; +(p6) mov in2=blocksize + ;; + shr.u r21=in2,7 // this much cache line + shr.u r22=in2,4 // number of 16-byte iteration + and curlen=15,in2 // copy length after iteration + and r30=7,src0 // source alignment + ;; + cmp.lt p7,p8=1,r21 + add cnt=-1,r21 + ;; + + add src_pre_mem=0,src0 // prefetch src pointer + add dst_pre_mem=0,dst0 // prefetch dest pointer + and src0=-8,src0 // 1st src pointer +(p7) mov ar.lc = cnt +(p8) mov ar.lc = r0 + ;; + TEXT_ALIGN(32) +1: lfetch.fault [src_pre_mem], 128 + lfetch.fault.excl [dst_pre_mem], 128 + br.cloop.dptk.few 1b + ;; + + shladd dst1=r22,3,dst0 // 2nd dest pointer + shladd src1=r22,3,src0 // 2nd src pointer + cmp.eq p8,p9=r22,r0 // do we really need to loop? + cmp.le p6,p7=8,curlen; // have at least 8 byte remaining? + add cnt=-1,r22 // ctop iteration adjustment + ;; +EX(.ex_handler, (p9) ld8 r33=[src0],8) // loop primer +EK(.ex_handler, (p9) ld8 r37=[src1],8) +(p8) br.dpnt.few .noloop + ;; + +// The jump address is calculated based on src alignment. The COPYU +// macro below need to confine its size to power of two, so an entry +// can be caulated using shl instead of an expensive multiply. The +// size is then hard coded by the following #define to match the +// actual size. This make it somewhat tedious when COPYU macro gets +// changed and this need to be adjusted to match. +#define LOOP_SIZE 6 +1: + mov r29=ip // jmp_table thread + mov ar.lc=cnt + ;; + add r29=.jump_table - 1b - (.jmp1-.jump_table), r29 + shl r28=r30, LOOP_SIZE // jmp_table thread + mov ar.ec=2 // loop setup + ;; + add r29=r29,r28 // jmp_table thread + cmp.eq p16,p17=r0,r0 + ;; + mov b6=r29 // jmp_table thread + ;; + br.cond.sptk.few b6 + +// for 8-15 byte case +// We will skip the loop, but need to replicate the side effect +// that the loop produces. +.noloop: +EX(.ex_handler, (p6) ld8 r37=[src1],8) + add src0=8,src0 +(p6) shl r25=r30,3 + ;; +EX(.ex_handler, (p6) ld8 r27=[src1]) +(p6) shr.u r28=r37,r25 +(p6) sub r26=64,r25 + ;; +(p6) shl r27=r27,r26 + ;; +(p6) or r21=r28,r27 + +.unaligned_src_tail: +/* check if we have more than blocksize to copy, if so go back */ + cmp.gt p8,p0=saved_in2,blocksize + ;; +(p8) add dst0=saved_in0,blocksize +(p8) add src0=saved_in1,blocksize +(p8) sub in2=saved_in2,blocksize +(p8) br.dpnt .4k_block + ;; + +/* we have up to 15 byte to copy in the tail. + * part of work is already done in the jump table code + * we are at the following state. + * src side: + * + * xxxxxx xx <----- r21 has xxxxxxxx already + * -------- -------- -------- + * 0 8 16 + * ^ + * | + * src1 + * + * dst + * -------- -------- -------- + * ^ + * | + * dst1 + */ +EX(.ex_handler, (p6) st8 [dst1]=r21,8) // more than 8 byte to copy +(p6) add curlen=-8,curlen // update length + mov ar.pfs=saved_pfs + ;; + mov ar.lc=saved_lc + mov pr=saved_pr,-1 + mov in2=curlen // remaining length + mov dst0=dst1 // dest pointer + add src0=src1,r30 // forward by src alignment + ;; + +// 7 byte or smaller. +.memcpy_short: + cmp.le p8,p9 = 1,in2 + cmp.le p10,p11 = 2,in2 + cmp.le p12,p13 = 3,in2 + cmp.le p14,p15 = 4,in2 + add src1=1,src0 // second src pointer + add dst1=1,dst0 // second dest pointer + ;; + +EX(.ex_handler_short, (p8) ld1 t1=[src0],2) +EK(.ex_handler_short, (p10) ld1 t2=[src1],2) +(p9) br.ret.dpnt rp // 0 byte copy + ;; + +EX(.ex_handler_short, (p8) st1 [dst0]=t1,2) +EK(.ex_handler_short, (p10) st1 [dst1]=t2,2) +(p11) br.ret.dpnt rp // 1 byte copy + +EX(.ex_handler_short, (p12) ld1 t3=[src0],2) +EK(.ex_handler_short, (p14) ld1 t4=[src1],2) +(p13) br.ret.dpnt rp // 2 byte copy + ;; + + cmp.le p6,p7 = 5,in2 + cmp.le p8,p9 = 6,in2 + cmp.le p10,p11 = 7,in2 + +EX(.ex_handler_short, (p12) st1 [dst0]=t3,2) +EK(.ex_handler_short, (p14) st1 [dst1]=t4,2) +(p15) br.ret.dpnt rp // 3 byte copy + ;; + +EX(.ex_handler_short, (p6) ld1 t5=[src0],2) +EK(.ex_handler_short, (p8) ld1 t6=[src1],2) +(p7) br.ret.dpnt rp // 4 byte copy + ;; + +EX(.ex_handler_short, (p6) st1 [dst0]=t5,2) +EK(.ex_handler_short, (p8) st1 [dst1]=t6,2) +(p9) br.ret.dptk rp // 5 byte copy + +EX(.ex_handler_short, (p10) ld1 t7=[src0],2) +(p11) br.ret.dptk rp // 6 byte copy + ;; + +EX(.ex_handler_short, (p10) st1 [dst0]=t7,2) + br.ret.dptk rp // done all cases + + +/* Align dest to nearest 8-byte boundary. We know we have at + * least 7 bytes to copy, enough to crawl to 8-byte boundary. + * Actual number of byte to crawl depend on the dest alignment. + * 7 byte or less is taken care at .memcpy_short + + * src0 - source even index + * src1 - source odd index + * dst0 - dest even index + * dst1 - dest odd index + * r30 - distance to 8-byte boundary + */ + +.align_dest: + add src1=1,in1 // source odd index + cmp.le p7,p0 = 2,r30 // for .align_dest + cmp.le p8,p0 = 3,r30 // for .align_dest +EX(.ex_handler_short, (p6) ld1 t1=[src0],2) + cmp.le p9,p0 = 4,r30 // for .align_dest + cmp.le p10,p0 = 5,r30 + ;; +EX(.ex_handler_short, (p7) ld1 t2=[src1],2) +EK(.ex_handler_short, (p8) ld1 t3=[src0],2) + cmp.le p11,p0 = 6,r30 +EX(.ex_handler_short, (p6) st1 [dst0] = t1,2) + cmp.le p12,p0 = 7,r30 + ;; +EX(.ex_handler_short, (p9) ld1 t4=[src1],2) +EK(.ex_handler_short, (p10) ld1 t5=[src0],2) +EX(.ex_handler_short, (p7) st1 [dst1] = t2,2) +EK(.ex_handler_short, (p8) st1 [dst0] = t3,2) + ;; +EX(.ex_handler_short, (p11) ld1 t6=[src1],2) +EK(.ex_handler_short, (p12) ld1 t7=[src0],2) + cmp.eq p6,p7=r28,r29 +EX(.ex_handler_short, (p9) st1 [dst1] = t4,2) +EK(.ex_handler_short, (p10) st1 [dst0] = t5,2) + sub in2=in2,r30 + ;; +EX(.ex_handler_short, (p11) st1 [dst1] = t6,2) +EK(.ex_handler_short, (p12) st1 [dst0] = t7) + add dst0=in0,r30 // setup arguments + add src0=in1,r30 +(p6) br.cond.dptk .aligned_src +(p7) br.cond.dpnt .unaligned_src + ;; + +/* main loop body in jump table format */ +#define COPYU(shift) \ +1: \ +EX(.ex_handler, (p16) ld8 r32=[src0],8); /* 1 */ \ +EK(.ex_handler, (p16) ld8 r36=[src1],8); \ + (p17) shrp r35=r33,r34,shift;; /* 1 */ \ +EX(.ex_handler, (p6) ld8 r22=[src1]); /* common, prime for tail section */ \ + nop.m 0; \ + (p16) shrp r38=r36,r37,shift; \ +EX(.ex_handler, (p17) st8 [dst0]=r35,8); /* 1 */ \ +EK(.ex_handler, (p17) st8 [dst1]=r39,8); \ + br.ctop.dptk.few 1b;; \ + (p7) add src1=-8,src1; /* back out for <8 byte case */ \ + shrp r21=r22,r38,shift; /* speculative work */ \ + br.sptk.few .unaligned_src_tail /* branch out of jump table */ \ + ;; + TEXT_ALIGN(32) +.jump_table: + COPYU(8) // unaligned cases +.jmp1: + COPYU(16) + COPYU(24) + COPYU(32) + COPYU(40) + COPYU(48) + COPYU(56) + +#undef A +#undef B +#undef C +#undef D + +/* + * Due to lack of local tag support in gcc 2.x assembler, it is not clear which + * instruction failed in the bundle. The exception algorithm is that we + * first figure out the faulting address, then detect if there is any + * progress made on the copy, if so, redo the copy from last known copied + * location up to the faulting address (exclusive). In the copy_from_user + * case, remaining byte in kernel buffer will be zeroed. + * + * Take copy_from_user as an example, in the code there are multiple loads + * in a bundle and those multiple loads could span over two pages, the + * faulting address is calculated as page_round_down(max(src0, src1)). + * This is based on knowledge that if we can access one byte in a page, we + * can access any byte in that page. + * + * predicate used in the exception handler: + * p6-p7: direction + * p10-p11: src faulting addr calculation + * p12-p13: dst faulting addr calculation + */ + +#define A r19 +#define B r20 +#define C r21 +#define D r22 +#define F r28 + +#define saved_retval loc0 +#define saved_rtlink loc1 +#define saved_pfs_stack loc2 + +.ex_hndlr_s: + add src0=8,src0 + br.sptk .ex_handler + ;; +.ex_hndlr_d: + add dst0=8,dst0 + br.sptk .ex_handler + ;; +.ex_hndlr_lcpy_1: + mov src1=src_pre_mem + mov dst1=dst_pre_mem + cmp.gtu p10,p11=src_pre_mem,saved_in1 + cmp.gtu p12,p13=dst_pre_mem,saved_in0 + ;; +(p10) add src0=8,saved_in1 +(p11) mov src0=saved_in1 +(p12) add dst0=8,saved_in0 +(p13) mov dst0=saved_in0 + br.sptk .ex_handler +.ex_handler_lcpy: + // in line_copy block, the preload addresses should always ahead + // of the other two src/dst pointers. Furthermore, src1/dst1 should + // always ahead of src0/dst0. + mov src1=src_pre_mem + mov dst1=dst_pre_mem +.ex_handler: + mov pr=saved_pr,-1 // first restore pr, lc, and pfs + mov ar.lc=saved_lc + mov ar.pfs=saved_pfs + ;; +.ex_handler_short: // fault occurred in these sections didn't change pr, lc, pfs + cmp.ltu p6,p7=saved_in0, saved_in1 // get the copy direction + cmp.ltu p10,p11=src0,src1 + cmp.ltu p12,p13=dst0,dst1 + fcmp.eq p8,p0=f6,f0 // is it memcpy? + mov tmp = dst0 + ;; +(p11) mov src1 = src0 // pick the larger of the two +(p13) mov dst0 = dst1 // make dst0 the smaller one +(p13) mov dst1 = tmp // and dst1 the larger one + ;; +(p6) dep F = r0,dst1,0,PAGE_SHIFT // usr dst round down to page boundary +(p7) dep F = r0,src1,0,PAGE_SHIFT // usr src round down to page boundary + ;; +(p6) cmp.le p14,p0=dst0,saved_in0 // no progress has been made on store +(p7) cmp.le p14,p0=src0,saved_in1 // no progress has been made on load + mov retval=saved_in2 +(p8) ld1 tmp=[src1] // force an oops for memcpy call +(p8) st1 [dst1]=r0 // force an oops for memcpy call +(p14) br.ret.sptk.many rp + +/* + * The remaining byte to copy is calculated as: + * + * A = (faulting_addr - orig_src) -> len to faulting ld address + * or + * (faulting_addr - orig_dst) -> len to faulting st address + * B = (cur_dst - orig_dst) -> len copied so far + * C = A - B -> len need to be copied + * D = orig_len - A -> len need to be left along + */ +(p6) sub A = F, saved_in0 +(p7) sub A = F, saved_in1 + clrrrb + ;; + alloc saved_pfs_stack=ar.pfs,3,3,3,0 + cmp.lt p8,p0=A,r0 + sub B = dst0, saved_in0 // how many byte copied so far + ;; +(p8) mov A = 0; // A shouldn't be negative, cap it + ;; + sub C = A, B + sub D = saved_in2, A + ;; + cmp.gt p8,p0=C,r0 // more than 1 byte? + mov r8=0 + mov saved_retval = D + mov saved_rtlink = b0 + + add out0=saved_in0, B + add out1=saved_in1, B + mov out2=C +(p8) br.call.sptk.few b0=__copy_user // recursive call + ;; + + add saved_retval=saved_retval,r8 // above might return non-zero value + ;; + + mov retval=saved_retval + mov ar.pfs=saved_pfs_stack + mov b0=saved_rtlink + br.ret.sptk.many rp + +/* end of McKinley specific optimization */ +END(__copy_user) +EXPORT_SYMBOL(__copy_user) diff --git a/arch/ia64/lib/memset.S b/arch/ia64/lib/memset.S new file mode 100644 index 000000000000..552c5c7e4d06 --- /dev/null +++ b/arch/ia64/lib/memset.S @@ -0,0 +1,365 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Optimized version of the standard memset() function. + + Copyright (c) 2002 Hewlett-Packard Co/CERN + Sverre Jarp + + Return: dest + + Inputs: + in0: dest + in1: value + in2: count + + The algorithm is fairly straightforward: set byte by byte until we + we get to a 16B-aligned address, then loop on 128 B chunks using an + early store as prefetching, then loop on 32B chucks, then clear remaining + words, finally clear remaining bytes. + Since a stf.spill f0 can store 16B in one go, we use this instruction + to get peak speed when value = 0. */ + +#include +#include +#undef ret + +#define dest in0 +#define value in1 +#define cnt in2 + +#define tmp r31 +#define save_lc r30 +#define ptr0 r29 +#define ptr1 r28 +#define ptr2 r27 +#define ptr3 r26 +#define ptr9 r24 +#define loopcnt r23 +#define linecnt r22 +#define bytecnt r21 + +#define fvalue f6 + +// This routine uses only scratch predicate registers (p6 - p15) +#define p_scr p6 // default register for same-cycle branches +#define p_nz p7 +#define p_zr p8 +#define p_unalgn p9 +#define p_y p11 +#define p_n p12 +#define p_yy p13 +#define p_nn p14 + +#define MIN1 15 +#define MIN1P1HALF 8 +#define LINE_SIZE 128 +#define LSIZE_SH 7 // shift amount +#define PREF_AHEAD 8 + +GLOBAL_ENTRY(memset) +{ .mmi + .prologue + alloc tmp = ar.pfs, 3, 0, 0, 0 + lfetch.nt1 [dest] // + .save ar.lc, save_lc + mov.i save_lc = ar.lc + .body +} { .mmi + mov ret0 = dest // return value + cmp.ne p_nz, p_zr = value, r0 // use stf.spill if value is zero + cmp.eq p_scr, p0 = cnt, r0 +;; } +{ .mmi + and ptr2 = -(MIN1+1), dest // aligned address + and tmp = MIN1, dest // prepare to check for correct alignment + tbit.nz p_y, p_n = dest, 0 // Do we have an odd address? (M_B_U) +} { .mib + mov ptr1 = dest + mux1 value = value, @brcst // create 8 identical bytes in word +(p_scr) br.ret.dpnt.many rp // return immediately if count = 0 +;; } +{ .mib + cmp.ne p_unalgn, p0 = tmp, r0 // +} { .mib + sub bytecnt = (MIN1+1), tmp // NB: # of bytes to move is 1 higher than loopcnt + cmp.gt p_scr, p0 = 16, cnt // is it a minimalistic task? +(p_scr) br.cond.dptk.many .move_bytes_unaligned // go move just a few (M_B_U) +;; } +{ .mmi +(p_unalgn) add ptr1 = (MIN1+1), ptr2 // after alignment +(p_unalgn) add ptr2 = MIN1P1HALF, ptr2 // after alignment +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 3 // should we do a st8 ? +;; } +{ .mib +(p_y) add cnt = -8, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 2 // should we do a st4 ? +} { .mib +(p_y) st8 [ptr2] = value,-4 // +(p_n) add ptr2 = 4, ptr2 // +;; } +{ .mib +(p_yy) add cnt = -4, cnt // +(p_unalgn) tbit.nz.unc p_y, p_n = bytecnt, 1 // should we do a st2 ? +} { .mib +(p_yy) st4 [ptr2] = value,-2 // +(p_nn) add ptr2 = 2, ptr2 // +;; } +{ .mmi + mov tmp = LINE_SIZE+1 // for compare +(p_y) add cnt = -2, cnt // +(p_unalgn) tbit.nz.unc p_yy, p_nn = bytecnt, 0 // should we do a st1 ? +} { .mmi + setf.sig fvalue=value // transfer value to FLP side +(p_y) st2 [ptr2] = value,-1 // +(p_n) add ptr2 = 1, ptr2 // +;; } + +{ .mmi +(p_yy) st1 [ptr2] = value // + cmp.gt p_scr, p0 = tmp, cnt // is it a minimalistic task? +} { .mbb +(p_yy) add cnt = -1, cnt // +(p_scr) br.cond.dpnt.many .fraction_of_line // go move just a few +;; } + +{ .mib + nop.m 0 + shr.u linecnt = cnt, LSIZE_SH +(p_zr) br.cond.dptk.many .l1b // Jump to use stf.spill +;; } + + TEXT_ALIGN(32) // --------------------- // L1A: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt // + add ptr2 = 8, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt // +;; } +.pref_l1a: +{ .mib + stf8 [ptr9] = fvalue, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1a +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp // +;; } +.l1ax: + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 8 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 24 + stf8 [ptr0] = fvalue, 24 + ;; } + { .mmi + stf8 [ptr2] = fvalue, 8 + stf8 [ptr0] = fvalue, 32 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf8 [ptr2] = fvalue, 24 +(p_scr) stf8 [ptr9] = fvalue, 128 + br.cloop.dptk.few .l1ax +;; } +{ .mbb + cmp.le p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .fraction_of_line // Branch no. 2 + br.cond.dpnt.many .move_bytes_from_alignment // Branch no. 3 +;; } + + TEXT_ALIGN(32) +.l1b: // ------------------------------------ // L1B: store ahead into cache lines; fill later +{ .mmi + and tmp = -(LINE_SIZE), cnt // compute end of range + mov ptr9 = ptr1 // used for prefetching + and cnt = (LINE_SIZE-1), cnt // remainder +} { .mmi + mov loopcnt = PREF_AHEAD-1 // default prefetch loop + cmp.gt p_scr, p0 = PREF_AHEAD, linecnt // check against actual value +;; } +{ .mmi +(p_scr) add loopcnt = -1, linecnt + add ptr2 = 16, ptr1 // start of stores (beyond prefetch stores) + add ptr1 = tmp, ptr1 // first address beyond total range +;; } +{ .mmi + add tmp = -1, linecnt // next loop count + mov.i ar.lc = loopcnt +;; } +.pref_l1b: +{ .mib + stf.spill [ptr9] = f0, 128 // Do stores one cache line apart + nop.i 0 + br.cloop.dptk.few .pref_l1b +;; } +{ .mmi + add ptr0 = 16, ptr2 // Two stores in parallel + mov.i ar.lc = tmp +;; } +.l1bx: + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 32 + ;; } + { .mmi + stf.spill [ptr2] = f0, 32 + stf.spill [ptr0] = f0, 64 + cmp.lt p_scr, p0 = ptr9, ptr1 // do we need more prefetching? + ;; } +{ .mmb + stf.spill [ptr2] = f0, 32 +(p_scr) stf.spill [ptr9] = f0, 128 + br.cloop.dptk.few .l1bx +;; } +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // +;; } + +.fraction_of_line: +{ .mib + add ptr2 = 16, ptr1 + shr.u loopcnt = cnt, 5 // loopcnt = cnt / 32 +;; } +{ .mib + cmp.eq p_scr, p0 = loopcnt, r0 + add loopcnt = -1, loopcnt +(p_scr) br.cond.dpnt.many .store_words +;; } +{ .mib + and cnt = 0x1f, cnt // compute the remaining cnt + mov.i ar.lc = loopcnt +;; } + TEXT_ALIGN(32) +.l2: // ------------------------------------ // L2A: store 32B in 2 cycles +{ .mmb + stf8 [ptr1] = fvalue, 8 + stf8 [ptr2] = fvalue, 8 +;; } { .mmb + stf8 [ptr1] = fvalue, 24 + stf8 [ptr2] = fvalue, 24 + br.cloop.dptk.many .l2 +;; } +.store_words: +{ .mib + cmp.gt p_scr, p0 = 8, cnt // just a few bytes left ? +(p_scr) br.cond.dpnt.many .move_bytes_from_alignment // Branch +;; } + +{ .mmi + stf8 [ptr1] = fvalue, 8 // store + cmp.le p_y, p_n = 16, cnt + add cnt = -8, cnt // subtract +;; } +{ .mmi +(p_y) stf8 [ptr1] = fvalue, 8 // store +(p_y) cmp.le.unc p_yy, p_nn = 16, cnt +(p_y) add cnt = -8, cnt // subtract +;; } +{ .mmi // store +(p_yy) stf8 [ptr1] = fvalue, 8 +(p_yy) add cnt = -8, cnt // subtract +;; } + +.move_bytes_from_alignment: +{ .mib + cmp.eq p_scr, p0 = cnt, r0 + tbit.nz.unc p_y, p0 = cnt, 2 // should we terminate with a st4 ? +(p_scr) br.cond.dpnt.few .restore_and_exit +;; } +{ .mib +(p_y) st4 [ptr1] = value,4 + tbit.nz.unc p_yy, p0 = cnt, 1 // should we terminate with a st2 ? +;; } +{ .mib +(p_yy) st2 [ptr1] = value,2 + tbit.nz.unc p_y, p0 = cnt, 0 // should we terminate with a st1 ? +;; } + +{ .mib +(p_y) st1 [ptr1] = value +;; } +.restore_and_exit: +{ .mib + nop.m 0 + mov.i ar.lc = save_lc + br.ret.sptk.many rp +;; } + +.move_bytes_unaligned: +{ .mmi + .pred.rel "mutex",p_y, p_n + .pred.rel "mutex",p_yy, p_nn +(p_n) cmp.le p_yy, p_nn = 4, cnt +(p_y) cmp.le p_yy, p_nn = 5, cnt +(p_n) add ptr2 = 2, ptr1 +} { .mmi +(p_y) add ptr2 = 3, ptr1 +(p_y) st1 [ptr1] = value, 1 // fill 1 (odd-aligned) byte [15, 14 (or less) left] +(p_y) add cnt = -1, cnt +;; } +{ .mmi +(p_yy) cmp.le.unc p_y, p0 = 8, cnt + add ptr3 = ptr1, cnt // prepare last store + mov.i ar.lc = save_lc +} { .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [11, 10 (o less) left] +(p_yy) add cnt = -4, cnt +;; } +{ .mmi +(p_y) cmp.le.unc p_yy, p0 = 8, cnt + add ptr3 = -1, ptr3 // last store + tbit.nz p_scr, p0 = cnt, 1 // will there be a st2 at the end ? +} { .mmi +(p_y) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_y) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [7, 6 (or less) left] +(p_y) add cnt = -4, cnt +;; } +{ .mmi +(p_yy) st2 [ptr1] = value, 4 // fill 2 (aligned) bytes +(p_yy) st2 [ptr2] = value, 4 // fill 2 (aligned) bytes [3, 2 (or less) left] + tbit.nz p_y, p0 = cnt, 0 // will there be a st1 at the end ? +} { .mmi +(p_yy) add cnt = -4, cnt +;; } +{ .mmb +(p_scr) st2 [ptr1] = value // fill 2 (aligned) bytes +(p_y) st1 [ptr3] = value // fill last byte (using ptr3) + br.ret.sptk.many rp +} +END(memset) +EXPORT_SYMBOL(memset) diff --git a/arch/ia64/lib/strlen.S b/arch/ia64/lib/strlen.S new file mode 100644 index 000000000000..1f4a46c15127 --- /dev/null +++ b/arch/ia64/lib/strlen.S @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * + * Optimized version of the standard strlen() function + * + * + * Inputs: + * in0 address of string + * + * Outputs: + * ret0 the number of characters in the string (0 if empty string) + * does not count the \0 + * + * Copyright (C) 1999, 2001 Hewlett-Packard Co + * Stephane Eranian + * + * 09/24/99 S.Eranian add speculation recovery code + */ + +#include +#include + +// +// +// This is an enhanced version of the basic strlen. it includes a combination +// of compute zero index (czx), parallel comparisons, speculative loads and +// loop unroll using rotating registers. +// +// General Ideas about the algorithm: +// The goal is to look at the string in chunks of 8 bytes. +// so we need to do a few extra checks at the beginning because the +// string may not be 8-byte aligned. In this case we load the 8byte +// quantity which includes the start of the string and mask the unused +// bytes with 0xff to avoid confusing czx. +// We use speculative loads and software pipelining to hide memory +// latency and do read ahead safely. This way we defer any exception. +// +// Because we don't want the kernel to be relying on particular +// settings of the DCR register, we provide recovery code in case +// speculation fails. The recovery code is going to "redo" the work using +// only normal loads. If we still get a fault then we generate a +// kernel panic. Otherwise we return the strlen as usual. +// +// The fact that speculation may fail can be caused, for instance, by +// the DCR.dm bit being set. In this case TLB misses are deferred, i.e., +// a NaT bit will be set if the translation is not present. The normal +// load, on the other hand, will cause the translation to be inserted +// if the mapping exists. +// +// It should be noted that we execute recovery code only when we need +// to use the data that has been speculatively loaded: we don't execute +// recovery code on pure read ahead data. +// +// Remarks: +// - the cmp r0,r0 is used as a fast way to initialize a predicate +// register to 1. This is required to make sure that we get the parallel +// compare correct. +// +// - we don't use the epilogue counter to exit the loop but we need to set +// it to zero beforehand. +// +// - after the loop we must test for Nat values because neither the +// czx nor cmp instruction raise a NaT consumption fault. We must be +// careful not to look too far for a Nat for which we don't care. +// For instance we don't need to look at a NaT in val2 if the zero byte +// was in val1. +// +// - Clearly performance tuning is required. +// +// +// +#define saved_pfs r11 +#define tmp r10 +#define base r16 +#define orig r17 +#define saved_pr r18 +#define src r19 +#define mask r20 +#define val r21 +#define val1 r22 +#define val2 r23 + +GLOBAL_ENTRY(strlen) + .prologue + .save ar.pfs, saved_pfs + alloc saved_pfs=ar.pfs,11,0,0,8 // rotating must be multiple of 8 + + .rotr v[2], w[2] // declares our 4 aliases + + extr.u tmp=in0,0,3 // tmp=least significant 3 bits + mov orig=in0 // keep trackof initial byte address + dep src=0,in0,0,3 // src=8byte-aligned in0 address + .save pr, saved_pr + mov saved_pr=pr // preserve predicates (rotation) + ;; + + .body + + ld8 v[1]=[src],8 // must not speculate: can fail here + shl tmp=tmp,3 // multiply by 8bits/byte + mov mask=-1 // our mask + ;; + ld8.s w[1]=[src],8 // speculatively load next + cmp.eq p6,p0=r0,r0 // sets p6 to true for cmp.and + sub tmp=64,tmp // how many bits to shift our mask on the right + ;; + shr.u mask=mask,tmp // zero enough bits to hold v[1] valuable part + mov ar.ec=r0 // clear epilogue counter (saved in ar.pfs) + ;; + add base=-16,src // keep track of aligned base + or v[1]=v[1],mask // now we have a safe initial byte pattern + ;; +1: + ld8.s v[0]=[src],8 // speculatively load next + czx1.r val1=v[1] // search 0 byte from right + czx1.r val2=w[1] // search 0 byte from right following 8bytes + ;; + ld8.s w[0]=[src],8 // speculatively load next to next + cmp.eq.and p6,p0=8,val1 // p6 = p6 and val1==8 + cmp.eq.and p6,p0=8,val2 // p6 = p6 and mask==8 +(p6) br.wtop.dptk 1b // loop until p6 == 0 + ;; + // + // We must return try the recovery code iff + // val1_is_nat || (val1==8 && val2_is_nat) + // + // XXX Fixme + // - there must be a better way of doing the test + // + cmp.eq p8,p9=8,val1 // p6 = val1 had zero (disambiguate) + tnat.nz p6,p7=val1 // test NaT on val1 +(p6) br.cond.spnt .recover // jump to recovery if val1 is NaT + ;; + // + // if we come here p7 is true, i.e., initialized for // cmp + // + cmp.eq.and p7,p0=8,val1// val1==8? + tnat.nz.and p7,p0=val2 // test NaT if val2 +(p7) br.cond.spnt .recover // jump to recovery if val2 is NaT + ;; +(p8) mov val1=val2 // the other test got us out of the loop +(p8) adds src=-16,src // correct position when 3 ahead +(p9) adds src=-24,src // correct position when 4 ahead + ;; + sub ret0=src,orig // distance from base + sub tmp=8,val1 // which byte in word + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // adjust + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of normal execution + + // + // Outlined recovery code when speculation failed + // + // This time we don't use speculation and rely on the normal exception + // mechanism. that's why the loop is not as good as the previous one + // because read ahead is not possible + // + // IMPORTANT: + // Please note that in the case of strlen() as opposed to strlen_user() + // we don't use the exception mechanism, as this function is not + // supposed to fail. If that happens it means we have a bug and the + // code will cause of kernel fault. + // + // XXX Fixme + // - today we restart from the beginning of the string instead + // of trying to continue where we left off. + // +.recover: + ld8 val=[base],8 // will fail if unrecoverable fault + ;; + or val=val,mask // remask first bytes + cmp.eq p0,p6=r0,r0 // nullify first ld8 in loop + ;; + // + // ar.ec is still zero here + // +2: +(p6) ld8 val=[base],8 // will fail if unrecoverable fault + ;; + czx1.r val1=val // search 0 byte from right + ;; + cmp.eq p6,p0=8,val1 // val1==8 ? +(p6) br.wtop.dptk 2b // loop until p6 == 0 + ;; // (avoid WAW on p63) + sub ret0=base,orig // distance from base + sub tmp=8,val1 + mov pr=saved_pr,0xffffffffffff0000 + ;; + sub ret0=ret0,tmp // length=now - back -1 + mov ar.pfs=saved_pfs // because of ar.ec, restore no matter what + br.ret.sptk.many rp // end of successful recovery code +END(strlen) +EXPORT_SYMBOL(strlen) diff --git a/arch/ia64/lib/strncpy_from_user.S b/arch/ia64/lib/strncpy_from_user.S new file mode 100644 index 000000000000..a287169bd953 --- /dev/null +++ b/arch/ia64/lib/strncpy_from_user.S @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Just like strncpy() except that if a fault occurs during copying, + * -EFAULT is returned. + * + * Inputs: + * in0: address of destination buffer + * in1: address of string to be copied + * in2: length of buffer in bytes + * Outputs: + * r8: -EFAULT in case of fault or number of bytes copied if no fault + * + * Copyright (C) 1998-2001 Hewlett-Packard Co + * Copyright (C) 1998-2001 David Mosberger-Tang + * + * 00/03/06 D. Mosberger Fixed to return proper return value (bug found by + * by Andreas Schwab ). + */ + +#include +#include + +GLOBAL_ENTRY(__strncpy_from_user) + alloc r2=ar.pfs,3,0,0,0 + mov r8=0 + mov r9=in1 + ;; + add r10=in1,in2 + cmp.eq p6,p0=r0,in2 +(p6) br.ret.spnt.many rp + + // XXX braindead copy loop---this needs to be optimized +.Loop1: + EX(.Lexit, ld1 r8=[in1],1) + ;; + EX(.Lexit, st1 [in0]=r8,1) + cmp.ne p6,p7=r8,r0 + ;; +(p6) cmp.ne.unc p8,p0=in1,r10 +(p8) br.cond.dpnt.few .Loop1 + ;; +(p6) mov r8=in2 // buffer filled up---return buffer length +(p7) sub r8=in1,r9,1 // return string length (excluding NUL character) +[.Lexit:] + br.ret.sptk.many rp +END(__strncpy_from_user) +EXPORT_SYMBOL(__strncpy_from_user) diff --git a/arch/ia64/lib/strnlen_user.S b/arch/ia64/lib/strnlen_user.S new file mode 100644 index 000000000000..a7eb56e840a9 --- /dev/null +++ b/arch/ia64/lib/strnlen_user.S @@ -0,0 +1,48 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Returns 0 if exception before NUL or reaching the supplied limit (N), + * a value greater than N if the string is longer than the limit, else + * strlen. + * + * Inputs: + * in0: address of buffer + * in1: string length limit N + * Outputs: + * r8: 0 in case of fault, strlen(buffer)+1 otherwise + * + * Copyright (C) 1999, 2001 David Mosberger-Tang + */ + +#include +#include + +GLOBAL_ENTRY(__strnlen_user) + .prologue + alloc r2=ar.pfs,2,0,0,0 + .save ar.lc, r16 + mov r16=ar.lc // preserve ar.lc + + .body + + add r3=-1,in1 + ;; + mov ar.lc=r3 + mov r9=0 + ;; + // XXX braindead strlen loop---this needs to be optimized +.Loop1: + EXCLR(.Lexit, ld1 r8=[in0],1) + add r9=1,r9 + ;; + cmp.eq p6,p0=r8,r0 +(p6) br.cond.dpnt .Lexit + br.cloop.dptk.few .Loop1 + + add r9=1,in1 // NUL not found---return N+1 + ;; +.Lexit: + mov r8=r9 + mov ar.lc=r16 // restore ar.lc + br.ret.sptk.many rp +END(__strnlen_user) +EXPORT_SYMBOL(__strnlen_user) diff --git a/arch/ia64/lib/xor.S b/arch/ia64/lib/xor.S new file mode 100644 index 000000000000..6e2a69662c06 --- /dev/null +++ b/arch/ia64/lib/xor.S @@ -0,0 +1,181 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * arch/ia64/lib/xor.S + * + * Optimized RAID-5 checksumming functions for IA-64. + */ + +#include +#include + +GLOBAL_ENTRY(xor_ia64_2) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 3, 0, 13, 16 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov ar.lc = in0 + mov pr.rot = 1 << 16 + ;; + .rotr s1[6+1], s2[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[6+1])st8.nta [r8] = d[1], 8 + nop.f 0 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_2) +EXPORT_SYMBOL(xor_ia64_2) + +GLOBAL_ENTRY(xor_ia64_3) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 4, 0, 20, 24 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] + ;; +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], s3[6] + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_3) +EXPORT_SYMBOL(xor_ia64_3) + +GLOBAL_ENTRY(xor_ia64_4) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 5, 0, 27, 32 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + mov r19 = in4 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[0]) ld8.nta s4[0] = [r19], 8 +(p[6]) xor r20 = s3[6], s4[6] + ;; +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], r20 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_4) +EXPORT_SYMBOL(xor_ia64_4) + +GLOBAL_ENTRY(xor_ia64_5) + .prologue + .fframe 0 + .save ar.pfs, r31 + alloc r31 = ar.pfs, 6, 0, 34, 40 + .save ar.lc, r30 + mov r30 = ar.lc + .save pr, r29 + mov r29 = pr + ;; + .body + mov r8 = in1 + mov ar.ec = 6 + 2 + shr in0 = in0, 3 + ;; + adds in0 = -1, in0 + mov r16 = in1 + mov r17 = in2 + ;; + mov r18 = in3 + mov ar.lc = in0 + mov pr.rot = 1 << 16 + mov r19 = in4 + mov r20 = in5 + ;; + .rotr s1[6+1], s2[6+1], s3[6+1], s4[6+1], s5[6+1], d[2] + .rotp p[6+2] +0: +(p[0]) ld8.nta s1[0] = [r16], 8 +(p[0]) ld8.nta s2[0] = [r17], 8 +(p[6]) xor d[0] = s1[6], s2[6] +(p[0]) ld8.nta s3[0] = [r18], 8 +(p[0]) ld8.nta s4[0] = [r19], 8 +(p[6]) xor r21 = s3[6], s4[6] + ;; +(p[0]) ld8.nta s5[0] = [r20], 8 +(p[6+1])st8.nta [r8] = d[1], 8 +(p[6]) xor d[0] = d[0], r21 + ;; +(p[6]) xor d[0] = d[0], s5[6] + nop.f 0 + br.ctop.dptk.few 0b + ;; + mov ar.lc = r30 + mov pr = r29, -1 + br.ret.sptk.few rp +END(xor_ia64_5) +EXPORT_SYMBOL(xor_ia64_5) diff --git a/arch/ia64/mm/Makefile b/arch/ia64/mm/Makefile new file mode 100644 index 000000000000..c03f63c62ac4 --- /dev/null +++ b/arch/ia64/mm/Makefile @@ -0,0 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Makefile for the ia64-specific parts of the memory manager. +# + +obj-y := init.o fault.o tlb.o extable.o ioremap.o + +obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o +obj-$(CONFIG_NUMA) += numa.o +obj-$(CONFIG_SPARSEMEM) += discontig.o +obj-$(CONFIG_FLATMEM) += contig.o diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c new file mode 100644 index 000000000000..1e9eaa107eb7 --- /dev/null +++ b/arch/ia64/mm/contig.c @@ -0,0 +1,208 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Stephane Eranian + * Copyright (C) 2000, Rohit Seth + * Copyright (C) 1999 VA Linux Systems + * Copyright (C) 1999 Walt Drummond + * Copyright (C) 2003 Silicon Graphics, Inc. All rights reserved. + * + * Routines used by ia64 machines with contiguous (or virtually contiguous) + * memory. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/* physical address where the bootmem map is located */ +unsigned long bootmap_start; + +#ifdef CONFIG_SMP +static void *cpu_data; +/** + * per_cpu_init - setup per-cpu variables + * + * Allocate and setup per-cpu data areas. + */ +void *per_cpu_init(void) +{ + static bool first_time = true; + void *cpu0_data = __cpu0_per_cpu; + unsigned int cpu; + + if (!first_time) + goto skip; + first_time = false; + + /* + * get_free_pages() cannot be used before cpu_init() done. + * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs + * to avoid that AP calls get_zeroed_page(). + */ + for_each_possible_cpu(cpu) { + void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start; + + memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start; + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + + /* + * percpu area for cpu0 is moved from the __init area + * which is setup by head.S and used till this point. + * Update ar.k3. This move is ensures that percpu + * area for cpu0 is on the correct node and its + * virtual address isn't insanely far from other + * percpu areas which is important for congruent + * percpu allocator. + */ + if (cpu == 0) + ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) - + (unsigned long)__per_cpu_start); + + cpu_data += PERCPU_PAGE_SIZE; + } +skip: + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; +} + +static inline __init void +alloc_per_cpu_data(void) +{ + size_t size = PERCPU_PAGE_SIZE * num_possible_cpus(); + + cpu_data = memblock_alloc_from(size, PERCPU_PAGE_SIZE, + __pa(MAX_DMA_ADDRESS)); + if (!cpu_data) + panic("%s: Failed to allocate %lu bytes align=%lx from=%lx\n", + __func__, size, PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS)); +} + +/** + * setup_per_cpu_areas - setup percpu areas + * + * Arch code has already allocated and initialized percpu areas. All + * this function has to do is to teach the determined layout to the + * dynamic percpu allocator, which happens to be more complex than + * creating whole new ones using helpers. + */ +void __init +setup_per_cpu_areas(void) +{ + struct pcpu_alloc_info *ai; + struct pcpu_group_info *gi; + unsigned int cpu; + ssize_t static_size, reserved_size, dyn_size; + + ai = pcpu_alloc_alloc_info(1, num_possible_cpus()); + if (!ai) + panic("failed to allocate pcpu_alloc_info"); + gi = &ai->groups[0]; + + /* units are assigned consecutively to possible cpus */ + for_each_possible_cpu(cpu) + gi->cpu_map[gi->nr_units++] = cpu; + + /* set parameters */ + static_size = __per_cpu_end - __per_cpu_start; + reserved_size = PERCPU_MODULE_RESERVE; + dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; + if (dyn_size < 0) + panic("percpu area overflow static=%zd reserved=%zd\n", + static_size, reserved_size); + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = PERCPU_PAGE_SIZE; + ai->atom_size = PAGE_SIZE; + ai->alloc_size = PERCPU_PAGE_SIZE; + + pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]); + pcpu_free_alloc_info(ai); +} +#else +#define alloc_per_cpu_data() do { } while (0) +#endif /* CONFIG_SMP */ + +/** + * find_memory - setup memory map + * + * Walk the EFI memory map and find usable memory for the system, taking + * into account reserved areas. + */ +void __init +find_memory (void) +{ + reserve_memory(); + + /* first find highest page frame number */ + min_low_pfn = ~0UL; + max_low_pfn = 0; + efi_memmap_walk(find_max_min_low_pfn, NULL); + max_pfn = max_low_pfn; + + memblock_add_node(0, PFN_PHYS(max_low_pfn), 0, MEMBLOCK_NONE); + + find_initrd(); + + alloc_per_cpu_data(); +} + +static int __init find_largest_hole(u64 start, u64 end, void *arg) +{ + u64 *max_gap = arg; + + static u64 last_end = PAGE_OFFSET; + + /* NOTE: this algorithm assumes efi memmap table is ordered */ + + if (*max_gap < (start - last_end)) + *max_gap = start - last_end; + last_end = end; + return 0; +} + +static void __init verify_gap_absence(void) +{ + unsigned long max_gap; + + /* Forbid FLATMEM if hole is > than 1G */ + efi_memmap_walk(find_largest_hole, (u64 *)&max_gap); + if (max_gap >= SZ_1G) + panic("Cannot use FLATMEM with %ldMB hole\n" + "Please switch over to SPARSEMEM\n", + (max_gap >> 20)); +} + +/* + * Set up the page tables. + */ + +void __init +paging_init (void) +{ + unsigned long max_dma; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + max_zone_pfns[ZONE_DMA32] = max_dma; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + + verify_gap_absence(); + + free_area_init(max_zone_pfns); + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c new file mode 100644 index 000000000000..73d0db36edb6 --- /dev/null +++ b/arch/ia64/mm/discontig.c @@ -0,0 +1,635 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2000, 2003 Silicon Graphics, Inc. All rights reserved. + * Copyright (c) 2001 Intel Corp. + * Copyright (c) 2001 Tony Luck + * Copyright (c) 2002 NEC Corp. + * Copyright (c) 2002 Kimio Suganuma + * Copyright (c) 2004 Silicon Graphics, Inc + * Russ Anderson + * Jesse Barnes + * Jack Steiner + */ + +/* + * Platform initialization for Discontig Memory + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Track per-node information needed to setup the boot memory allocator, the + * per-node areas, and the real VM. + */ +struct early_node_data { + struct ia64_node_data *node_data; + unsigned long pernode_addr; + unsigned long pernode_size; + unsigned long min_pfn; + unsigned long max_pfn; +}; + +static struct early_node_data mem_data[MAX_NUMNODES] __initdata; +static nodemask_t memory_less_mask __initdata; + +pg_data_t *pgdat_list[MAX_NUMNODES]; + +/* + * To prevent cache aliasing effects, align per-node structures so that they + * start at addresses that are strided by node number. + */ +#define MAX_NODE_ALIGN_OFFSET (32 * 1024 * 1024) +#define NODEDATA_ALIGN(addr, node) \ + ((((addr) + 1024*1024-1) & ~(1024*1024-1)) + \ + (((node)*PERCPU_PAGE_SIZE) & (MAX_NODE_ALIGN_OFFSET - 1))) + +/** + * build_node_maps - callback to setup mem_data structs for each node + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * Detect extents of each piece of memory that we wish to + * treat as a virtually contiguous block (i.e. each node). Each such block + * must start on an %IA64_GRANULE_SIZE boundary, so we round the address down + * if necessary. Any non-existent pages will simply be part of the virtual + * memmap. + */ +static int __init build_node_maps(unsigned long start, unsigned long len, + int node) +{ + unsigned long spfn, epfn, end = start + len; + + epfn = GRANULEROUNDUP(end) >> PAGE_SHIFT; + spfn = GRANULEROUNDDOWN(start) >> PAGE_SHIFT; + + if (!mem_data[node].min_pfn) { + mem_data[node].min_pfn = spfn; + mem_data[node].max_pfn = epfn; + } else { + mem_data[node].min_pfn = min(spfn, mem_data[node].min_pfn); + mem_data[node].max_pfn = max(epfn, mem_data[node].max_pfn); + } + + return 0; +} + +/** + * early_nr_cpus_node - return number of cpus on a given node + * @node: node to check + * + * Count the number of cpus on @node. We can't use nr_cpus_node() yet because + * acpi_boot_init() (which builds the node_to_cpu_mask array) hasn't been + * called yet. Note that node 0 will also count all non-existent cpus. + */ +static int early_nr_cpus_node(int node) +{ + int cpu, n = 0; + + for_each_possible_early_cpu(cpu) + if (node == node_cpuid[cpu].nid) + n++; + + return n; +} + +/** + * compute_pernodesize - compute size of pernode data + * @node: the node id. + */ +static unsigned long compute_pernodesize(int node) +{ + unsigned long pernodesize = 0, cpus; + + cpus = early_nr_cpus_node(node); + pernodesize += PERCPU_PAGE_SIZE * cpus; + pernodesize += node * L1_CACHE_BYTES; + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernodesize += L1_CACHE_ALIGN(sizeof(pg_data_t)); + pernodesize = PAGE_ALIGN(pernodesize); + return pernodesize; +} + +/** + * per_cpu_node_setup - setup per-cpu areas on each node + * @cpu_data: per-cpu area on this node + * @node: node to setup + * + * Copy the static per-cpu data into the region we just set aside and then + * setup __per_cpu_offset for each CPU on this node. Return a pointer to + * the end of the area. + */ +static void *per_cpu_node_setup(void *cpu_data, int node) +{ +#ifdef CONFIG_SMP + int cpu; + + for_each_possible_early_cpu(cpu) { + void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start; + + if (node != node_cpuid[cpu].nid) + continue; + + memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start); + __per_cpu_offset[cpu] = (char *)__va(cpu_data) - + __per_cpu_start; + + /* + * percpu area for cpu0 is moved from the __init area + * which is setup by head.S and used till this point. + * Update ar.k3. This move is ensures that percpu + * area for cpu0 is on the correct node and its + * virtual address isn't insanely far from other + * percpu areas which is important for congruent + * percpu allocator. + */ + if (cpu == 0) + ia64_set_kr(IA64_KR_PER_CPU_DATA, + (unsigned long)cpu_data - + (unsigned long)__per_cpu_start); + + cpu_data += PERCPU_PAGE_SIZE; + } +#endif + return cpu_data; +} + +#ifdef CONFIG_SMP +/** + * setup_per_cpu_areas - setup percpu areas + * + * Arch code has already allocated and initialized percpu areas. All + * this function has to do is to teach the determined layout to the + * dynamic percpu allocator, which happens to be more complex than + * creating whole new ones using helpers. + */ +void __init setup_per_cpu_areas(void) +{ + struct pcpu_alloc_info *ai; + struct pcpu_group_info *gi; + unsigned int *cpu_map; + void *base; + unsigned long base_offset; + unsigned int cpu; + ssize_t static_size, reserved_size, dyn_size; + int node, prev_node, unit, nr_units; + + ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids); + if (!ai) + panic("failed to allocate pcpu_alloc_info"); + cpu_map = ai->groups[0].cpu_map; + + /* determine base */ + base = (void *)ULONG_MAX; + for_each_possible_cpu(cpu) + base = min(base, + (void *)(__per_cpu_offset[cpu] + __per_cpu_start)); + base_offset = (void *)__per_cpu_start - base; + + /* build cpu_map, units are grouped by node */ + unit = 0; + for_each_node(node) + for_each_possible_cpu(cpu) + if (node == node_cpuid[cpu].nid) + cpu_map[unit++] = cpu; + nr_units = unit; + + /* set basic parameters */ + static_size = __per_cpu_end - __per_cpu_start; + reserved_size = PERCPU_MODULE_RESERVE; + dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size; + if (dyn_size < 0) + panic("percpu area overflow static=%zd reserved=%zd\n", + static_size, reserved_size); + + ai->static_size = static_size; + ai->reserved_size = reserved_size; + ai->dyn_size = dyn_size; + ai->unit_size = PERCPU_PAGE_SIZE; + ai->atom_size = PAGE_SIZE; + ai->alloc_size = PERCPU_PAGE_SIZE; + + /* + * CPUs are put into groups according to node. Walk cpu_map + * and create new groups at node boundaries. + */ + prev_node = NUMA_NO_NODE; + ai->nr_groups = 0; + for (unit = 0; unit < nr_units; unit++) { + cpu = cpu_map[unit]; + node = node_cpuid[cpu].nid; + + if (node == prev_node) { + gi->nr_units++; + continue; + } + prev_node = node; + + gi = &ai->groups[ai->nr_groups++]; + gi->nr_units = 1; + gi->base_offset = __per_cpu_offset[cpu] + base_offset; + gi->cpu_map = &cpu_map[unit]; + } + + pcpu_setup_first_chunk(ai, base); + pcpu_free_alloc_info(ai); +} +#endif + +/** + * fill_pernode - initialize pernode data. + * @node: the node id. + * @pernode: physical address of pernode data + * @pernodesize: size of the pernode data + */ +static void __init fill_pernode(int node, unsigned long pernode, + unsigned long pernodesize) +{ + void *cpu_data; + int cpus = early_nr_cpus_node(node); + + mem_data[node].pernode_addr = pernode; + mem_data[node].pernode_size = pernodesize; + memset(__va(pernode), 0, pernodesize); + + cpu_data = (void *)pernode; + pernode += PERCPU_PAGE_SIZE * cpus; + pernode += node * L1_CACHE_BYTES; + + pgdat_list[node] = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + mem_data[node].node_data = __va(pernode); + pernode += L1_CACHE_ALIGN(sizeof(struct ia64_node_data)); + pernode += L1_CACHE_ALIGN(sizeof(pg_data_t)); + + cpu_data = per_cpu_node_setup(cpu_data, node); + + return; +} + +/** + * find_pernode_space - allocate memory for memory map and per-node structures + * @start: physical start of range + * @len: length of range + * @node: node where this range resides + * + * This routine reserves space for the per-cpu data struct, the list of + * pg_data_ts and the per-node data struct. Each node will have something like + * the following in the first chunk of addr. space large enough to hold it. + * + * ________________________ + * | | + * |~~~~~~~~~~~~~~~~~~~~~~~~| <-- NODEDATA_ALIGN(start, node) for the first + * | PERCPU_PAGE_SIZE * | start and length big enough + * | cpus_on_this_node | Node 0 will also have entries for all non-existent cpus. + * |------------------------| + * | local pg_data_t * | + * |------------------------| + * | local ia64_node_data | + * |------------------------| + * | ??? | + * |________________________| + * + * Once this space has been set aside, the bootmem maps are initialized. We + * could probably move the allocation of the per-cpu and ia64_node_data space + * outside of this function and use alloc_bootmem_node(), but doing it here + * is straightforward and we get the alignments we want so... + */ +static int __init find_pernode_space(unsigned long start, unsigned long len, + int node) +{ + unsigned long spfn, epfn; + unsigned long pernodesize = 0, pernode; + + spfn = start >> PAGE_SHIFT; + epfn = (start + len) >> PAGE_SHIFT; + + /* + * Make sure this memory falls within this node's usable memory + * since we may have thrown some away in build_maps(). + */ + if (spfn < mem_data[node].min_pfn || epfn > mem_data[node].max_pfn) + return 0; + + /* Don't setup this node's local space twice... */ + if (mem_data[node].pernode_addr) + return 0; + + /* + * Calculate total size needed, incl. what's necessary + * for good alignment and alias prevention. + */ + pernodesize = compute_pernodesize(node); + pernode = NODEDATA_ALIGN(start, node); + + /* Is this range big enough for what we want to store here? */ + if (start + len > (pernode + pernodesize)) + fill_pernode(node, pernode, pernodesize); + + return 0; +} + +/** + * reserve_pernode_space - reserve memory for per-node space + * + * Reserve the space used by the bootmem maps & per-node space in the boot + * allocator so that when we actually create the real mem maps we don't + * use their memory. + */ +static void __init reserve_pernode_space(void) +{ + unsigned long base, size; + int node; + + for_each_online_node(node) { + if (node_isset(node, memory_less_mask)) + continue; + + /* Now the per-node space */ + size = mem_data[node].pernode_size; + base = __pa(mem_data[node].pernode_addr); + memblock_reserve(base, size); + } +} + +static void scatter_node_data(void) +{ + pg_data_t **dst; + int node; + + /* + * for_each_online_node() can't be used at here. + * node_online_map is not set for hot-added nodes at this time, + * because we are halfway through initialization of the new node's + * structures. If for_each_online_node() is used, a new node's + * pg_data_ptrs will be not initialized. Instead of using it, + * pgdat_list[] is checked. + */ + for_each_node(node) { + if (pgdat_list[node]) { + dst = LOCAL_DATA_ADDR(pgdat_list[node])->pg_data_ptrs; + memcpy(dst, pgdat_list, sizeof(pgdat_list)); + } + } +} + +/** + * initialize_pernode_data - fixup per-cpu & per-node pointers + * + * Each node's per-node area has a copy of the global pg_data_t list, so + * we copy that to each node here, as well as setting the per-cpu pointer + * to the local node data structure. + */ +static void __init initialize_pernode_data(void) +{ + int cpu, node; + + scatter_node_data(); + +#ifdef CONFIG_SMP + /* Set the node_data pointer for each per-cpu struct */ + for_each_possible_early_cpu(cpu) { + node = node_cpuid[cpu].nid; + per_cpu(ia64_cpu_info, cpu).node_data = + mem_data[node].node_data; + } +#else + { + struct cpuinfo_ia64 *cpu0_cpu_info; + cpu = 0; + node = node_cpuid[cpu].nid; + cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start + + ((char *)&ia64_cpu_info - __per_cpu_start)); + cpu0_cpu_info->node_data = mem_data[node].node_data; + } +#endif /* CONFIG_SMP */ +} + +/** + * memory_less_node_alloc - * attempt to allocate memory on the best NUMA slit + * node but fall back to any other node when __alloc_bootmem_node fails + * for best. + * @nid: node id + * @pernodesize: size of this node's pernode data + */ +static void __init *memory_less_node_alloc(int nid, unsigned long pernodesize) +{ + void *ptr = NULL; + u8 best = 0xff; + int bestnode = NUMA_NO_NODE, node, anynode = 0; + + for_each_online_node(node) { + if (node_isset(node, memory_less_mask)) + continue; + else if (node_distance(nid, node) < best) { + best = node_distance(nid, node); + bestnode = node; + } + anynode = node; + } + + if (bestnode == NUMA_NO_NODE) + bestnode = anynode; + + ptr = memblock_alloc_try_nid(pernodesize, PERCPU_PAGE_SIZE, + __pa(MAX_DMA_ADDRESS), + MEMBLOCK_ALLOC_ACCESSIBLE, + bestnode); + if (!ptr) + panic("%s: Failed to allocate %lu bytes align=0x%lx nid=%d from=%lx\n", + __func__, pernodesize, PERCPU_PAGE_SIZE, bestnode, + __pa(MAX_DMA_ADDRESS)); + + return ptr; +} + +/** + * memory_less_nodes - allocate and initialize CPU only nodes pernode + * information. + */ +static void __init memory_less_nodes(void) +{ + unsigned long pernodesize; + void *pernode; + int node; + + for_each_node_mask(node, memory_less_mask) { + pernodesize = compute_pernodesize(node); + pernode = memory_less_node_alloc(node, pernodesize); + fill_pernode(node, __pa(pernode), pernodesize); + } + + return; +} + +/** + * find_memory - walk the EFI memory map and setup the bootmem allocator + * + * Called early in boot to setup the bootmem allocator, and to + * allocate the per-cpu and per-node structures. + */ +void __init find_memory(void) +{ + int node; + + reserve_memory(); + efi_memmap_walk(filter_memory, register_active_ranges); + + if (num_online_nodes() == 0) { + printk(KERN_ERR "node info missing!\n"); + node_set_online(0); + } + + nodes_or(memory_less_mask, memory_less_mask, node_online_map); + min_low_pfn = -1; + max_low_pfn = 0; + + /* These actually end up getting called by call_pernode_memory() */ + efi_memmap_walk(filter_rsvd_memory, build_node_maps); + efi_memmap_walk(filter_rsvd_memory, find_pernode_space); + efi_memmap_walk(find_max_min_low_pfn, NULL); + + for_each_online_node(node) + if (mem_data[node].min_pfn) + node_clear(node, memory_less_mask); + + reserve_pernode_space(); + memory_less_nodes(); + initialize_pernode_data(); + + max_pfn = max_low_pfn; + + find_initrd(); +} + +#ifdef CONFIG_SMP +/** + * per_cpu_init - setup per-cpu variables + * + * find_pernode_space() does most of this already, we just need to set + * local_per_cpu_offset + */ +void *per_cpu_init(void) +{ + int cpu; + static int first_time = 1; + + if (first_time) { + first_time = 0; + for_each_possible_early_cpu(cpu) + per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu]; + } + + return __per_cpu_start + __per_cpu_offset[smp_processor_id()]; +} +#endif /* CONFIG_SMP */ + +/** + * call_pernode_memory - use SRAT to call callback functions with node info + * @start: physical start of range + * @len: length of range + * @arg: function to call for each range + * + * efi_memmap_walk() knows nothing about layout of memory across nodes. Find + * out to which node a block of memory belongs. Ignore memory that we cannot + * identify, and split blocks that run across multiple nodes. + * + * Take this opportunity to round the start address up and the end address + * down to page boundaries. + */ +void call_pernode_memory(unsigned long start, unsigned long len, void *arg) +{ + unsigned long rs, re, end = start + len; + void (*func)(unsigned long, unsigned long, int); + int i; + + start = PAGE_ALIGN(start); + end &= PAGE_MASK; + if (start >= end) + return; + + func = arg; + + if (!num_node_memblks) { + /* No SRAT table, so assume one node (node 0) */ + if (start < end) + (*func)(start, end - start, 0); + return; + } + + for (i = 0; i < num_node_memblks; i++) { + rs = max(start, node_memblk[i].start_paddr); + re = min(end, node_memblk[i].start_paddr + + node_memblk[i].size); + + if (rs < re) + (*func)(rs, re - rs, node_memblk[i].nid); + + if (re == end) + break; + } +} + +/** + * paging_init - setup page tables + * + * paging_init() sets up the page tables for each node of the system and frees + * the bootmem allocator memory for general use. + */ +void __init paging_init(void) +{ + unsigned long max_dma; + unsigned long max_zone_pfns[MAX_NR_ZONES]; + + max_dma = virt_to_phys((void *) MAX_DMA_ADDRESS) >> PAGE_SHIFT; + + sparse_init(); + + memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); + max_zone_pfns[ZONE_DMA32] = max_dma; + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; + free_area_init(max_zone_pfns); + + zero_page_memmap_ptr = virt_to_page(ia64_imva(empty_zero_page)); +} + +pg_data_t * __init arch_alloc_nodedata(int nid) +{ + unsigned long size = compute_pernodesize(nid); + + return memblock_alloc(size, SMP_CACHE_BYTES); +} + +void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) +{ + pgdat_list[update_node] = update_pgdat; + scatter_node_data(); +} + +#ifdef CONFIG_SPARSEMEM_VMEMMAP +int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) +{ + return vmemmap_populate_basepages(start, end, node, NULL); +} + +void vmemmap_free(unsigned long start, unsigned long end, + struct vmem_altmap *altmap) +{ +} +#endif diff --git a/arch/ia64/mm/extable.c b/arch/ia64/mm/extable.c new file mode 100644 index 000000000000..da477c11770b --- /dev/null +++ b/arch/ia64/mm/extable.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Kernel exception handling table support. Derived from arch/alpha/mm/extable.c. + * + * Copyright (C) 1998, 1999, 2001-2002, 2004 Hewlett-Packard Co + * David Mosberger-Tang + */ + +#include +#include +#include +#include + +void +ia64_handle_exception (struct pt_regs *regs, const struct exception_table_entry *e) +{ + long fix = (u64) &e->fixup + e->fixup; + + regs->r8 = -EFAULT; + if (fix & 4) + regs->r9 = 0; + regs->cr_iip = fix & ~0xf; + ia64_psr(regs)->ri = fix & 0x3; /* set continuation slot number */ +} diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c new file mode 100644 index 000000000000..5458b52b4009 --- /dev/null +++ b/arch/ia64/mm/fault.c @@ -0,0 +1,251 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MMU fault handling support. + * + * Copyright (C) 1998-2002 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +extern int die(char *, struct pt_regs *, long); + +/* + * Return TRUE if ADDRESS points at a page in the kernel's mapped segment + * (inside region 5, on ia64) and that page is present. + */ +static int +mapped_kernel_page_is_present (unsigned long address) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + + pgd = pgd_offset_k(address); + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return 0; + + p4d = p4d_offset(pgd, address); + if (p4d_none(*p4d) || p4d_bad(*p4d)) + return 0; + + pud = pud_offset(p4d, address); + if (pud_none(*pud) || pud_bad(*pud)) + return 0; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return 0; + + ptep = pte_offset_kernel(pmd, address); + if (!ptep) + return 0; + + pte = *ptep; + return pte_present(pte); +} + +# define VM_READ_BIT 0 +# define VM_WRITE_BIT 1 +# define VM_EXEC_BIT 2 + +void __kprobes +ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs) +{ + int signal = SIGSEGV, code = SEGV_MAPERR; + struct vm_area_struct *vma, *prev_vma; + struct mm_struct *mm = current->mm; + unsigned long mask; + vm_fault_t fault; + unsigned int flags = FAULT_FLAG_DEFAULT; + + mask = ((((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT) + | (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)); + + /* mmap_lock is performance critical.... */ + prefetchw(&mm->mmap_lock); + + /* + * If we're in an interrupt or have no user context, we must not take the fault.. + */ + if (faulthandler_disabled() || !mm) + goto no_context; + + /* + * This is to handle the kprobes on user space access instructions + */ + if (kprobe_page_fault(regs, TRAP_BRKPT)) + return; + + if (user_mode(regs)) + flags |= FAULT_FLAG_USER; + if (mask & VM_WRITE) + flags |= FAULT_FLAG_WRITE; + + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address); +retry: + mmap_read_lock(mm); + + vma = find_vma_prev(mm, address, &prev_vma); + if (!vma && !prev_vma ) + goto bad_area; + + /* + * find_vma_prev() returns vma such that address < vma->vm_end or NULL + * + * May find no vma, but could be that the last vm area is the + * register backing store that needs to expand upwards, in + * this case vma will be null, but prev_vma will ne non-null + */ + if (( !vma && prev_vma ) || (address < vma->vm_start) ) { + vma = expand_stack(mm, address); + if (!vma) + goto bad_area_nosemaphore; + } + + code = SEGV_ACCERR; + + /* OK, we've got a good vm_area for this memory area. Check the access permissions: */ + +# if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \ + || (1 << VM_EXEC_BIT) != VM_EXEC) +# error File is out of sync with . Please update. +# endif + + if (((isr >> IA64_ISR_R_BIT) & 1UL) && (!(vma->vm_flags & (VM_READ | VM_WRITE)))) + goto bad_area; + + if ((vma->vm_flags & mask) != mask) + goto bad_area; + + /* + * If for any reason at all we couldn't handle the fault, make + * sure we exit gracefully rather than endlessly redo the + * fault. + */ + fault = handle_mm_fault(vma, address, flags, regs); + + if (fault_signal_pending(fault, regs)) { + if (!user_mode(regs)) + goto no_context; + return; + } + + /* The fault is fully completed (including releasing mmap lock) */ + if (fault & VM_FAULT_COMPLETED) + return; + + if (unlikely(fault & VM_FAULT_ERROR)) { + /* + * We ran out of memory, or some other thing happened + * to us that made us unable to handle the page fault + * gracefully. + */ + if (fault & VM_FAULT_OOM) { + goto out_of_memory; + } else if (fault & VM_FAULT_SIGSEGV) { + goto bad_area; + } else if (fault & VM_FAULT_SIGBUS) { + signal = SIGBUS; + goto bad_area; + } + BUG(); + } + + if (fault & VM_FAULT_RETRY) { + flags |= FAULT_FLAG_TRIED; + + /* No need to mmap_read_unlock(mm) as we would + * have already released it in __lock_page_or_retry + * in mm/filemap.c. + */ + + goto retry; + } + + mmap_read_unlock(mm); + return; + + bad_area: + mmap_read_unlock(mm); + bad_area_nosemaphore: + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + if (user_mode(regs)) { + force_sig_fault(signal, code, (void __user *) address, + 0, __ISR_VALID, isr); + return; + } + + no_context: + if ((isr & IA64_ISR_SP) + || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH)) + { + /* + * This fault was due to a speculative load or lfetch.fault, set the "ed" + * bit in the psr to ensure forward progress. (Target register will get a + * NaT for ld.s, lfetch will be canceled.) + */ + ia64_psr(regs)->ed = 1; + return; + } + + /* + * Since we have no vma's for region 5, we might get here even if the address is + * valid, due to the VHPT walker inserting a non present translation that becomes + * stale. If that happens, the non present fault handler already purged the stale + * translation, which fixed the problem. So, we check to see if the translation is + * valid, and return if it is. + */ + if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address)) + return; + + if (ia64_done_with_exception(regs)) + return; + + /* + * Oops. The kernel tried to access some bad page. We'll have to terminate things + * with extreme prejudice. + */ + bust_spinlocks(1); + + if (address < PAGE_SIZE) + printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address); + else + printk(KERN_ALERT "Unable to handle kernel paging request at " + "virtual address %016lx\n", address); + if (die("Oops", regs, isr)) + regs = NULL; + bust_spinlocks(0); + if (regs) + make_task_dead(SIGKILL); + return; + + out_of_memory: + mmap_read_unlock(mm); + if (!user_mode(regs)) + goto no_context; + pagefault_out_of_memory(); +} diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c new file mode 100644 index 000000000000..38de155d80c5 --- /dev/null +++ b/arch/ia64/mm/hugetlbpage.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IA-64 Huge TLB Page Support for Kernel. + * + * Copyright (C) 2002-2004 Rohit Seth + * Copyright (C) 2003-2004 Ken Chen + * + * Sep, 2003: add numa support + * Feb, 2004: dynamic hugetlb page size via boot parameter + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +unsigned int hpage_shift = HPAGE_SHIFT_DEFAULT; +EXPORT_SYMBOL(hpage_shift); + +pte_t * +huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long addr, unsigned long sz) +{ + unsigned long taddr = htlbpage_to_page(addr); + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, taddr); + p4d = p4d_offset(pgd, taddr); + pud = pud_alloc(mm, p4d, taddr); + if (pud) { + pmd = pmd_alloc(mm, pud, taddr); + if (pmd) + pte = pte_alloc_huge(mm, pmd, taddr); + } + return pte; +} + +pte_t * +huge_pte_offset (struct mm_struct *mm, unsigned long addr, unsigned long sz) +{ + unsigned long taddr = htlbpage_to_page(addr); + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte = NULL; + + pgd = pgd_offset(mm, taddr); + if (pgd_present(*pgd)) { + p4d = p4d_offset(pgd, taddr); + if (p4d_present(*p4d)) { + pud = pud_offset(p4d, taddr); + if (pud_present(*pud)) { + pmd = pmd_offset(pud, taddr); + if (pmd_present(*pmd)) + pte = pte_offset_huge(pmd, taddr); + } + } + } + + return pte; +} + +#define mk_pte_huge(entry) { pte_val(entry) |= _PAGE_P; } + +/* + * Don't actually need to do any preparation, but need to make sure + * the address is in the right region. + */ +int prepare_hugepage_range(struct file *file, + unsigned long addr, unsigned long len) +{ + if (len & ~HPAGE_MASK) + return -EINVAL; + if (addr & ~HPAGE_MASK) + return -EINVAL; + if (REGION_NUMBER(addr) != RGN_HPAGE) + return -EINVAL; + + return 0; +} + +int pmd_huge(pmd_t pmd) +{ + return 0; +} + +int pud_huge(pud_t pud) +{ + return 0; +} + +void hugetlb_free_pgd_range(struct mmu_gather *tlb, + unsigned long addr, unsigned long end, + unsigned long floor, unsigned long ceiling) +{ + /* + * This is called to free hugetlb page tables. + * + * The offset of these addresses from the base of the hugetlb + * region must be scaled down by HPAGE_SIZE/PAGE_SIZE so that + * the standard free_pgd_range will free the right page tables. + * + * If floor and ceiling are also in the hugetlb region, they + * must likewise be scaled down; but if outside, left unchanged. + */ + + addr = htlbpage_to_page(addr); + end = htlbpage_to_page(end); + if (REGION_NUMBER(floor) == RGN_HPAGE) + floor = htlbpage_to_page(floor); + if (REGION_NUMBER(ceiling) == RGN_HPAGE) + ceiling = htlbpage_to_page(ceiling); + + free_pgd_range(tlb, addr, end, floor, ceiling); +} + +unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, + unsigned long pgoff, unsigned long flags) +{ + struct vm_unmapped_area_info info; + + if (len > RGN_MAP_LIMIT) + return -ENOMEM; + if (len & ~HPAGE_MASK) + return -EINVAL; + + /* Handle MAP_FIXED */ + if (flags & MAP_FIXED) { + if (prepare_hugepage_range(file, addr, len)) + return -EINVAL; + return addr; + } + + /* This code assumes that RGN_HPAGE != 0. */ + if ((REGION_NUMBER(addr) != RGN_HPAGE) || (addr & (HPAGE_SIZE - 1))) + addr = HPAGE_REGION_BASE; + + info.flags = 0; + info.length = len; + info.low_limit = addr; + info.high_limit = HPAGE_REGION_BASE + RGN_MAP_LIMIT; + info.align_mask = PAGE_MASK & (HPAGE_SIZE - 1); + info.align_offset = 0; + return vm_unmapped_area(&info); +} + +static int __init hugetlb_setup_sz(char *str) +{ + u64 tr_pages; + unsigned long long size; + + if (ia64_pal_vm_page_size(&tr_pages, NULL) != 0) + /* + * shouldn't happen, but just in case. + */ + tr_pages = 0x15557000UL; + + size = memparse(str, &str); + if (*str || !is_power_of_2(size) || !(tr_pages & size) || + size <= PAGE_SIZE || + size > (1UL << PAGE_SHIFT << MAX_PAGE_ORDER)) { + printk(KERN_WARNING "Invalid huge page size specified\n"); + return 1; + } + + hpage_shift = __ffs(size); + /* + * boot cpu already executed ia64_mmu_init, and has HPAGE_SHIFT_DEFAULT + * override here with new page shift. + */ + ia64_set_rr(HPAGE_REGION_BASE, hpage_shift << 2); + return 0; +} +early_param("hugepagesz", hugetlb_setup_sz); diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c new file mode 100644 index 000000000000..05b0f2f0c073 --- /dev/null +++ b/arch/ia64/mm/init.c @@ -0,0 +1,532 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Initialize MMU support. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + */ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +extern void ia64_tlb_init (void); + +unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL; + +struct page *zero_page_memmap_ptr; /* map entry for zero page */ +EXPORT_SYMBOL(zero_page_memmap_ptr); + +void +__ia64_sync_icache_dcache (pte_t pte) +{ + unsigned long addr; + struct folio *folio; + + folio = page_folio(pte_page(pte)); + addr = (unsigned long)folio_address(folio); + + if (test_bit(PG_arch_1, &folio->flags)) + return; /* i-cache is already coherent with d-cache */ + + flush_icache_range(addr, addr + folio_size(folio)); + set_bit(PG_arch_1, &folio->flags); /* mark page as clean */ +} + +/* + * Since DMA is i-cache coherent, any (complete) folios that were written via + * DMA can be marked as "clean" so that lazy_mmu_prot_update() doesn't have to + * flush them when they get mapped into an executable vm-area. + */ +void arch_dma_mark_clean(phys_addr_t paddr, size_t size) +{ + unsigned long pfn = PHYS_PFN(paddr); + struct folio *folio = page_folio(pfn_to_page(pfn)); + ssize_t left = size; + size_t offset = offset_in_folio(folio, paddr); + + if (offset) { + left -= folio_size(folio) - offset; + if (left <= 0) + return; + folio = folio_next(folio); + } + + while (left >= (ssize_t)folio_size(folio)) { + left -= folio_size(folio); + set_bit(PG_arch_1, &pfn_to_page(pfn)->flags); + if (!left) + break; + folio = folio_next(folio); + } +} + +inline void +ia64_set_rbs_bot (void) +{ + unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16; + + if (stack_size > MAX_USER_STACK_SIZE) + stack_size = MAX_USER_STACK_SIZE; + current->thread.rbs_bot = PAGE_ALIGN(current->mm->start_stack - stack_size); +} + +/* + * This performs some platform-dependent address space initialization. + * On IA-64, we want to setup the VM area for the register backing + * store (which grows upwards) and install the gateway page which is + * used for signal trampolines, etc. + */ +void +ia64_init_addr_space (void) +{ + struct vm_area_struct *vma; + + ia64_set_rbs_bot(); + + /* + * If we're out of memory and kmem_cache_alloc() returns NULL, we simply ignore + * the problem. When the process attempts to write to the register backing store + * for the first time, it will get a SEGFAULT in this case. + */ + vma = vm_area_alloc(current->mm); + if (vma) { + vma_set_anonymous(vma); + vma->vm_start = current->thread.rbs_bot & PAGE_MASK; + vma->vm_end = vma->vm_start + PAGE_SIZE; + vm_flags_init(vma, VM_DATA_DEFAULT_FLAGS|VM_GROWSUP|VM_ACCOUNT); + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); + mmap_write_lock(current->mm); + if (insert_vm_struct(current->mm, vma)) { + mmap_write_unlock(current->mm); + vm_area_free(vma); + return; + } + mmap_write_unlock(current->mm); + } + + /* map NaT-page at address zero to speed up speculative dereferencing of NULL: */ + if (!(current->personality & MMAP_PAGE_ZERO)) { + vma = vm_area_alloc(current->mm); + if (vma) { + vma_set_anonymous(vma); + vma->vm_end = PAGE_SIZE; + vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); + vm_flags_init(vma, VM_READ | VM_MAYREAD | VM_IO | + VM_DONTEXPAND | VM_DONTDUMP); + mmap_write_lock(current->mm); + if (insert_vm_struct(current->mm, vma)) { + mmap_write_unlock(current->mm); + vm_area_free(vma); + return; + } + mmap_write_unlock(current->mm); + } + } +} + +void +free_initmem (void) +{ + free_reserved_area(ia64_imva(__init_begin), ia64_imva(__init_end), + -1, "unused kernel"); +} + +void __init +free_initrd_mem (unsigned long start, unsigned long end) +{ + /* + * EFI uses 4KB pages while the kernel can use 4KB or bigger. + * Thus EFI and the kernel may have different page sizes. It is + * therefore possible to have the initrd share the same page as + * the end of the kernel (given current setup). + * + * To avoid freeing/using the wrong page (kernel sized) we: + * - align up the beginning of initrd + * - align down the end of initrd + * + * | | + * |=============| a000 + * | | + * | | + * | | 9000 + * |/////////////| + * |/////////////| + * |=============| 8000 + * |///INITRD////| + * |/////////////| + * |/////////////| 7000 + * | | + * |KKKKKKKKKKKKK| + * |=============| 6000 + * |KKKKKKKKKKKKK| + * |KKKKKKKKKKKKK| + * K=kernel using 8KB pages + * + * In this example, we must free page 8000 ONLY. So we must align up + * initrd_start and keep initrd_end as is. + */ + start = PAGE_ALIGN(start); + end = end & PAGE_MASK; + + if (start < end) + printk(KERN_INFO "Freeing initrd memory: %ldkB freed\n", (end - start) >> 10); + + for (; start < end; start += PAGE_SIZE) { + if (!virt_addr_valid(start)) + continue; + free_reserved_page(virt_to_page(start)); + } +} + +/* + * This installs a clean page in the kernel's page table. + */ +static struct page * __init +put_kernel_page (struct page *page, unsigned long address, pgprot_t pgprot) +{ + pgd_t *pgd; + p4d_t *p4d; + pud_t *pud; + pmd_t *pmd; + pte_t *pte; + + pgd = pgd_offset_k(address); /* note: this is NOT pgd_offset()! */ + + { + p4d = p4d_alloc(&init_mm, pgd, address); + if (!p4d) + goto out; + pud = pud_alloc(&init_mm, p4d, address); + if (!pud) + goto out; + pmd = pmd_alloc(&init_mm, pud, address); + if (!pmd) + goto out; + pte = pte_alloc_kernel(pmd, address); + if (!pte) + goto out; + if (!pte_none(*pte)) + goto out; + set_pte(pte, mk_pte(page, pgprot)); + } + out: + /* no need for flush_tlb */ + return page; +} + +static void __init +setup_gate (void) +{ + struct page *page; + + /* + * Map the gate page twice: once read-only to export the ELF + * headers etc. and once execute-only page to enable + * privilege-promotion via "epc": + */ + page = virt_to_page(ia64_imva(__start_gate_section)); + put_kernel_page(page, GATE_ADDR, PAGE_READONLY); +#ifdef HAVE_BUGGY_SEGREL + page = virt_to_page(ia64_imva(__start_gate_section + PAGE_SIZE)); + put_kernel_page(page, GATE_ADDR + PAGE_SIZE, PAGE_GATE); +#else + put_kernel_page(page, GATE_ADDR + PERCPU_PAGE_SIZE, PAGE_GATE); + /* Fill in the holes (if any) with read-only zero pages: */ + { + unsigned long addr; + + for (addr = GATE_ADDR + PAGE_SIZE; + addr < GATE_ADDR + PERCPU_PAGE_SIZE; + addr += PAGE_SIZE) + { + put_kernel_page(ZERO_PAGE(0), addr, + PAGE_READONLY); + put_kernel_page(ZERO_PAGE(0), addr + PERCPU_PAGE_SIZE, + PAGE_READONLY); + } + } +#endif + ia64_patch_gate(); +} + +static struct vm_area_struct gate_vma; + +static int __init gate_vma_init(void) +{ + vma_init(&gate_vma, NULL); + gate_vma.vm_start = FIXADDR_USER_START; + gate_vma.vm_end = FIXADDR_USER_END; + vm_flags_init(&gate_vma, VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC); + gate_vma.vm_page_prot = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | _PAGE_AR_RX); + + return 0; +} +__initcall(gate_vma_init); + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return &gate_vma; +} + +int in_gate_area_no_mm(unsigned long addr) +{ + if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) + return 1; + return 0; +} + +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return in_gate_area_no_mm(addr); +} + +void ia64_mmu_init(void *my_cpu_data) +{ + unsigned long pta, impl_va_bits; + extern void tlb_init(void); + +#ifdef CONFIG_DISABLE_VHPT +# define VHPT_ENABLE_BIT 0 +#else +# define VHPT_ENABLE_BIT 1 +#endif + + /* + * Check if the virtually mapped linear page table (VMLPT) overlaps with a mapped + * address space. The IA-64 architecture guarantees that at least 50 bits of + * virtual address space are implemented but if we pick a large enough page size + * (e.g., 64KB), the mapped address space is big enough that it will overlap with + * VMLPT. I assume that once we run on machines big enough to warrant 64KB pages, + * IMPL_VA_MSB will be significantly bigger, so this is unlikely to become a + * problem in practice. Alternatively, we could truncate the top of the mapped + * address space to not permit mappings that would overlap with the VMLPT. + * --davidm 00/12/06 + */ +# define pte_bits 3 +# define mapped_space_bits (3*(PAGE_SHIFT - pte_bits) + PAGE_SHIFT) + /* + * The virtual page table has to cover the entire implemented address space within + * a region even though not all of this space may be mappable. The reason for + * this is that the Access bit and Dirty bit fault handlers perform + * non-speculative accesses to the virtual page table, so the address range of the + * virtual page table itself needs to be covered by virtual page table. + */ +# define vmlpt_bits (impl_va_bits - PAGE_SHIFT + pte_bits) +# define POW2(n) (1ULL << (n)) + + impl_va_bits = ffz(~(local_cpu_data->unimpl_va_mask | (7UL << 61))); + + if (impl_va_bits < 51 || impl_va_bits > 61) + panic("CPU has bogus IMPL_VA_MSB value of %lu!\n", impl_va_bits - 1); + /* + * mapped_space_bits - PAGE_SHIFT is the total number of ptes we need, + * which must fit into "vmlpt_bits - pte_bits" slots. Second half of + * the test makes sure that our mapped space doesn't overlap the + * unimplemented hole in the middle of the region. + */ + if ((mapped_space_bits - PAGE_SHIFT > vmlpt_bits - pte_bits) || + (mapped_space_bits > impl_va_bits - 1)) + panic("Cannot build a big enough virtual-linear page table" + " to cover mapped address space.\n" + " Try using a smaller page size.\n"); + + + /* place the VMLPT at the end of each page-table mapped region: */ + pta = POW2(61) - POW2(vmlpt_bits); + + /* + * Set the (virtually mapped linear) page table address. Bit + * 8 selects between the short and long format, bits 2-7 the + * size of the table, and bit 0 whether the VHPT walker is + * enabled. + */ + ia64_set_pta(pta | (0 << 8) | (vmlpt_bits << 2) | VHPT_ENABLE_BIT); + + ia64_tlb_init(); + +#ifdef CONFIG_HUGETLB_PAGE + ia64_set_rr(HPAGE_REGION_BASE, HPAGE_SHIFT << 2); + ia64_srlz_d(); +#endif +} + +int __init register_active_ranges(u64 start, u64 len, int nid) +{ + u64 end = start + len; + +#ifdef CONFIG_KEXEC + if (start > crashk_res.start && start < crashk_res.end) + start = crashk_res.end; + if (end > crashk_res.start && end < crashk_res.end) + end = crashk_res.start; +#endif + + if (start < end) + memblock_add_node(__pa(start), end - start, nid, MEMBLOCK_NONE); + return 0; +} + +int +find_max_min_low_pfn (u64 start, u64 end, void *arg) +{ + unsigned long pfn_start, pfn_end; +#ifdef CONFIG_FLATMEM + pfn_start = (PAGE_ALIGN(__pa(start))) >> PAGE_SHIFT; + pfn_end = (PAGE_ALIGN(__pa(end - 1))) >> PAGE_SHIFT; +#else + pfn_start = GRANULEROUNDDOWN(__pa(start)) >> PAGE_SHIFT; + pfn_end = GRANULEROUNDUP(__pa(end - 1)) >> PAGE_SHIFT; +#endif + min_low_pfn = min(min_low_pfn, pfn_start); + max_low_pfn = max(max_low_pfn, pfn_end); + return 0; +} + +/* + * Boot command-line option "nolwsys" can be used to disable the use of any light-weight + * system call handler. When this option is in effect, all fsyscalls will end up bubbling + * down into the kernel and calling the normal (heavy-weight) syscall handler. This is + * useful for performance testing, but conceivably could also come in handy for debugging + * purposes. + */ + +static int nolwsys __initdata; + +static int __init +nolwsys_setup (char *s) +{ + nolwsys = 1; + return 1; +} + +__setup("nolwsys", nolwsys_setup); + +void __init +mem_init (void) +{ + int i; + + BUG_ON(PTRS_PER_PGD * sizeof(pgd_t) != PAGE_SIZE); + BUG_ON(PTRS_PER_PMD * sizeof(pmd_t) != PAGE_SIZE); + BUG_ON(PTRS_PER_PTE * sizeof(pte_t) != PAGE_SIZE); + + /* + * This needs to be called _after_ the command line has been parsed but + * _before_ any drivers that may need the PCI DMA interface are + * initialized or bootmem has been freed. + */ + do { +#ifdef CONFIG_INTEL_IOMMU + detect_intel_iommu(); + if (iommu_detected) + break; +#endif + swiotlb_init(true, SWIOTLB_VERBOSE); + } while (0); + +#ifdef CONFIG_FLATMEM + BUG_ON(!mem_map); +#endif + + set_max_mapnr(max_low_pfn); + high_memory = __va(max_low_pfn * PAGE_SIZE); + memblock_free_all(); + + /* + * For fsyscall entrypoints with no light-weight handler, use the ordinary + * (heavy-weight) handler, but mark it by setting bit 0, so the fsyscall entry + * code can tell them apart. + */ + for (i = 0; i < NR_syscalls; ++i) { + extern unsigned long fsyscall_table[NR_syscalls]; + extern unsigned long sys_call_table[NR_syscalls]; + + if (!fsyscall_table[i] || nolwsys) + fsyscall_table[i] = sys_call_table[i] | 1; + } + setup_gate(); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +int arch_add_memory(int nid, u64 start, u64 size, + struct mhp_params *params) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + int ret; + + if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot)) + return -EINVAL; + + ret = __add_pages(nid, start_pfn, nr_pages, params); + if (ret) + printk("%s: Problem encountered in __add_pages() as ret=%d\n", + __func__, ret); + + return ret; +} + +void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long nr_pages = size >> PAGE_SHIFT; + + __remove_pages(start_pfn, nr_pages, altmap); +} +#endif + +static const pgprot_t protection_map[16] = { + [VM_NONE] = PAGE_NONE, + [VM_READ] = PAGE_READONLY, + [VM_WRITE] = PAGE_READONLY, + [VM_WRITE | VM_READ] = PAGE_READONLY, + [VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | + _PAGE_AR_X_RX), + [VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | + _PAGE_AR_RX), + [VM_EXEC | VM_WRITE] = PAGE_COPY_EXEC, + [VM_EXEC | VM_WRITE | VM_READ] = PAGE_COPY_EXEC, + [VM_SHARED] = PAGE_NONE, + [VM_SHARED | VM_READ] = PAGE_READONLY, + [VM_SHARED | VM_WRITE] = PAGE_SHARED, + [VM_SHARED | VM_WRITE | VM_READ] = PAGE_SHARED, + [VM_SHARED | VM_EXEC] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | + _PAGE_AR_X_RX), + [VM_SHARED | VM_EXEC | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | + _PAGE_AR_RX), + [VM_SHARED | VM_EXEC | VM_WRITE] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | + _PAGE_AR_RWX), + [VM_SHARED | VM_EXEC | VM_WRITE | VM_READ] = __pgprot(__ACCESS_BITS | _PAGE_PL_3 | + _PAGE_AR_RWX) +}; +DECLARE_VM_GET_PAGE_PROT diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c new file mode 100644 index 000000000000..711b6abc822e --- /dev/null +++ b/arch/ia64/mm/ioremap.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * (c) Copyright 2006, 2007 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static inline void __iomem * +__ioremap_uc(unsigned long phys_addr) +{ + return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); +} + +void __iomem * +early_ioremap (unsigned long phys_addr, unsigned long size) +{ + u64 attr; + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + return __ioremap_uc(phys_addr); +} + +void __iomem *ioremap_prot(phys_addr_t phys_addr, size_t size, + unsigned long flags) +{ + u64 attr; + unsigned long gran_base, gran_size; + unsigned long page_base; + + /* + * For things in kern_memmap, we must use the same attribute + * as the rest of the kernel. For more details, see + * Documentation/arch/ia64/aliasing.rst. + */ + attr = kern_mem_attribute(phys_addr, size); + if (attr & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + else if (attr & EFI_MEMORY_UC) + return __ioremap_uc(phys_addr); + + /* + * Some chipsets don't support UC access to memory. If + * WB is supported for the whole granule, we prefer that. + */ + gran_base = GRANULEROUNDDOWN(phys_addr); + gran_size = GRANULEROUNDUP(phys_addr + size) - gran_base; + if (efi_mem_attribute(gran_base, gran_size) & EFI_MEMORY_WB) + return (void __iomem *) phys_to_virt(phys_addr); + + /* + * WB is not supported for the whole granule, so we can't use + * the region 7 identity mapping. If we can safely cover the + * area with kernel page table mappings, we can use those + * instead. + */ + page_base = phys_addr & PAGE_MASK; + size = PAGE_ALIGN(phys_addr + size) - page_base; + if (efi_mem_attribute(page_base, size) & EFI_MEMORY_WB) + return generic_ioremap_prot(phys_addr, size, __pgprot(flags)); + + return __ioremap_uc(phys_addr); +} +EXPORT_SYMBOL(ioremap_prot); + +void __iomem * +ioremap_uc(unsigned long phys_addr, unsigned long size) +{ + if (kern_mem_attribute(phys_addr, size) & EFI_MEMORY_WB) + return NULL; + + return __ioremap_uc(phys_addr); +} +EXPORT_SYMBOL(ioremap_uc); + +void +early_iounmap (volatile void __iomem *addr, unsigned long size) +{ +} + +void iounmap(volatile void __iomem *addr) +{ + if (REGION_NUMBER(addr) == RGN_GATE) + vunmap((void *) ((unsigned long) addr & PAGE_MASK)); +} +EXPORT_SYMBOL(iounmap); diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c new file mode 100644 index 000000000000..4c7b1f50e3b7 --- /dev/null +++ b/arch/ia64/mm/numa.c @@ -0,0 +1,80 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * This file contains NUMA specific variables and functions which are used on + * NUMA machines with contiguous memory. + * + * 2002/08/07 Erich Focht + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +/* + * The following structures are usually initialized by ACPI or + * similar mechanisms and describe the NUMA characteristics of the machine. + */ +int num_node_memblks; +struct node_memblk_s node_memblk[NR_NODE_MEMBLKS]; +struct node_cpuid_s node_cpuid[NR_CPUS] = + { [0 ... NR_CPUS-1] = { .phys_id = 0, .nid = NUMA_NO_NODE } }; + +/* + * This is a matrix with "distances" between nodes, they should be + * proportional to the memory access latency ratios. + */ +u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; + +int __node_distance(int from, int to) +{ + return slit_distance(from, to); +} +EXPORT_SYMBOL(__node_distance); + +/* Identify which cnode a physical address resides on */ +int +paddr_to_nid(unsigned long paddr) +{ + int i; + + for (i = 0; i < num_node_memblks; i++) + if (paddr >= node_memblk[i].start_paddr && + paddr < node_memblk[i].start_paddr + node_memblk[i].size) + break; + + return (i < num_node_memblks) ? node_memblk[i].nid : (num_node_memblks ? -1 : 0); +} +EXPORT_SYMBOL(paddr_to_nid); + +#if defined(CONFIG_SPARSEMEM) && defined(CONFIG_NUMA) +void numa_clear_node(int cpu) +{ + unmap_cpu_from_node(cpu, NUMA_NO_NODE); +} + +#ifdef CONFIG_MEMORY_HOTPLUG +/* + * SRAT information is stored in node_memblk[], then we can use SRAT + * information at memory-hot-add if necessary. + */ + +int memory_add_physaddr_to_nid(u64 addr) +{ + int nid = paddr_to_nid(addr); + if (nid < 0) + return 0; + return nid; +} +EXPORT_SYMBOL(memory_add_physaddr_to_nid); +#endif +#endif diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c new file mode 100644 index 000000000000..ca060e7a2a46 --- /dev/null +++ b/arch/ia64/mm/tlb.c @@ -0,0 +1,591 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * TLB support routines. + * + * Copyright (C) 1998-2001, 2003 Hewlett-Packard Co + * David Mosberger-Tang + * + * 08/02/00 A. Mallick + * Modified RID allocation for SMP + * Goutham Rao + * IPI based ptc implementation and A-step IPI implementation. + * Rohit Seth + * Ken Chen + * Christophe de Dinechin : Avoid ptc.e on memory allocation + * Copyright (C) 2007 Intel Corp + * Fenghua Yu + * Add multiple ptc.g/ptc.ga instruction support in global tlb purge. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static struct { + u64 mask; /* mask of supported purge page-sizes */ + unsigned long max_bits; /* log2 of largest supported purge page-size */ +} purge; + +struct ia64_ctx ia64_ctx = { + .lock = __SPIN_LOCK_UNLOCKED(ia64_ctx.lock), + .next = 1, + .max_ctx = ~0U +}; + +DEFINE_PER_CPU(u8, ia64_need_tlb_flush); +DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/ +DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/ + +struct ia64_tr_entry *ia64_idtrs[NR_CPUS]; + +/* + * Initializes the ia64_ctx.bitmap array based on max_ctx+1. + * Called after cpu_init() has setup ia64_ctx.max_ctx based on + * maximum RID that is supported by boot CPU. + */ +void __init +mmu_context_init (void) +{ + ia64_ctx.bitmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, + SMP_CACHE_BYTES); + if (!ia64_ctx.bitmap) + panic("%s: Failed to allocate %u bytes\n", __func__, + (ia64_ctx.max_ctx + 1) >> 3); + ia64_ctx.flushmap = memblock_alloc((ia64_ctx.max_ctx + 1) >> 3, + SMP_CACHE_BYTES); + if (!ia64_ctx.flushmap) + panic("%s: Failed to allocate %u bytes\n", __func__, + (ia64_ctx.max_ctx + 1) >> 3); +} + +/* + * Acquire the ia64_ctx.lock before calling this function! + */ +void +wrap_mmu_context (struct mm_struct *mm) +{ + int i, cpu; + unsigned long flush_bit; + + for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { + flush_bit = xchg(&ia64_ctx.flushmap[i], 0); + ia64_ctx.bitmap[i] ^= flush_bit; + } + + /* use offset at 300 to skip daemons */ + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, 300); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + + /* + * can't call flush_tlb_all() here because of race condition + * with O(1) scheduler [EF] + */ + cpu = get_cpu(); /* prevent preemption/migration */ + for_each_online_cpu(i) + if (i != cpu) + per_cpu(ia64_need_tlb_flush, i) = 1; + put_cpu(); + local_flush_tlb_all(); +} + +/* + * Implement "spinaphores" ... like counting semaphores, but they + * spin instead of sleeping. If there are ever any other users for + * this primitive it can be moved up to a spinaphore.h header. + */ +struct spinaphore { + unsigned long ticket; + unsigned long serve; +}; + +static inline void spinaphore_init(struct spinaphore *ss, int val) +{ + ss->ticket = 0; + ss->serve = val; +} + +static inline void down_spin(struct spinaphore *ss) +{ + unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve; + + if (time_before(t, ss->serve)) + return; + + ia64_invala(); + + for (;;) { + asm volatile ("ld8.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory"); + if (time_before(t, serve)) + return; + cpu_relax(); + } +} + +static inline void up_spin(struct spinaphore *ss) +{ + ia64_fetchadd(1, &ss->serve, rel); +} + +static struct spinaphore ptcg_sem; +static u16 nptcg = 1; +static int need_ptcg_sem = 1; +static int toolatetochangeptcgsem = 0; + +/* + * Kernel parameter "nptcg=" overrides max number of concurrent global TLB + * purges which is reported from either PAL or SAL PALO. + * + * We don't have sanity checking for nptcg value. It's the user's responsibility + * for valid nptcg value on the platform. Otherwise, kernel may hang in some + * cases. + */ +static int __init +set_nptcg(char *str) +{ + int value = 0; + + get_option(&str, &value); + setup_ptcg_sem(value, NPTCG_FROM_KERNEL_PARAMETER); + + return 1; +} + +__setup("nptcg=", set_nptcg); + +/* + * Maximum number of simultaneous ptc.g purges in the system can + * be defined by PAL_VM_SUMMARY (in which case we should take + * the smallest value for any cpu in the system) or by the PAL + * override table (in which case we should ignore the value from + * PAL_VM_SUMMARY). + * + * Kernel parameter "nptcg=" overrides maximum number of simultaneous ptc.g + * purges defined in either PAL_VM_SUMMARY or PAL override table. In this case, + * we should ignore the value from either PAL_VM_SUMMARY or PAL override table. + * + * Complicating the logic here is the fact that num_possible_cpus() + * isn't fully setup until we start bringing cpus online. + */ +void +setup_ptcg_sem(int max_purges, int nptcg_from) +{ + static int kp_override; + static int palo_override; + static int firstcpu = 1; + + if (toolatetochangeptcgsem) { + if (nptcg_from == NPTCG_FROM_PAL && max_purges == 0) + BUG_ON(1 < nptcg); + else + BUG_ON(max_purges < nptcg); + return; + } + + if (nptcg_from == NPTCG_FROM_KERNEL_PARAMETER) { + kp_override = 1; + nptcg = max_purges; + goto resetsema; + } + if (kp_override) { + need_ptcg_sem = num_possible_cpus() > nptcg; + return; + } + + if (nptcg_from == NPTCG_FROM_PALO) { + palo_override = 1; + + /* In PALO max_purges == 0 really means it! */ + if (max_purges == 0) + panic("Whoa! Platform does not support global TLB purges.\n"); + nptcg = max_purges; + if (nptcg == PALO_MAX_TLB_PURGES) { + need_ptcg_sem = 0; + return; + } + goto resetsema; + } + if (palo_override) { + if (nptcg != PALO_MAX_TLB_PURGES) + need_ptcg_sem = (num_possible_cpus() > nptcg); + return; + } + + /* In PAL_VM_SUMMARY max_purges == 0 actually means 1 */ + if (max_purges == 0) max_purges = 1; + + if (firstcpu) { + nptcg = max_purges; + firstcpu = 0; + } + if (max_purges < nptcg) + nptcg = max_purges; + if (nptcg == PAL_MAX_PURGES) { + need_ptcg_sem = 0; + return; + } else + need_ptcg_sem = (num_possible_cpus() > nptcg); + +resetsema: + spinaphore_init(&ptcg_sem, max_purges); +} + +#ifdef CONFIG_SMP +static void +ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) +{ + struct mm_struct *active_mm = current->active_mm; + + toolatetochangeptcgsem = 1; + + if (mm != active_mm) { + /* Restore region IDs for mm */ + if (mm && active_mm) { + activate_context(mm); + } else { + flush_tlb_all(); + return; + } + } + + if (need_ptcg_sem) + down_spin(&ptcg_sem); + + do { + /* + * Flush ALAT entries also. + */ + ia64_ptcga(start, (nbits << 2)); + ia64_srlz_i(); + start += (1UL << nbits); + } while (start < end); + + if (need_ptcg_sem) + up_spin(&ptcg_sem); + + if (mm != active_mm) { + activate_context(active_mm); + } +} +#endif /* CONFIG_SMP */ + +void +local_flush_tlb_all (void) +{ + unsigned long i, j, flags, count0, count1, stride0, stride1, addr; + + addr = local_cpu_data->ptce_base; + count0 = local_cpu_data->ptce_count[0]; + count1 = local_cpu_data->ptce_count[1]; + stride0 = local_cpu_data->ptce_stride[0]; + stride1 = local_cpu_data->ptce_stride[1]; + + local_irq_save(flags); + for (i = 0; i < count0; ++i) { + for (j = 0; j < count1; ++j) { + ia64_ptce(addr); + addr += stride1; + } + addr += stride0; + } + local_irq_restore(flags); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +static void +__flush_tlb_range (struct vm_area_struct *vma, unsigned long start, + unsigned long end) +{ + struct mm_struct *mm = vma->vm_mm; + unsigned long size = end - start; + unsigned long nbits; + +#ifndef CONFIG_SMP + if (mm != current->active_mm) { + mm->context = 0; + return; + } +#endif + + nbits = ia64_fls(size + 0xfff); + while (unlikely (((1UL << nbits) & purge.mask) == 0) && + (nbits < purge.max_bits)) + ++nbits; + if (nbits > purge.max_bits) + nbits = purge.max_bits; + start &= ~((1UL << nbits) - 1); + + preempt_disable(); +#ifdef CONFIG_SMP + if (mm != current->active_mm || cpumask_weight(mm_cpumask(mm)) != 1) { + ia64_global_tlb_purge(mm, start, end, nbits); + preempt_enable(); + return; + } +#endif + do { + ia64_ptcl(start, (nbits<<2)); + start += (1UL << nbits); + } while (start < end); + preempt_enable(); + ia64_srlz_i(); /* srlz.i implies srlz.d */ +} + +void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (unlikely(end - start >= 1024*1024*1024*1024UL + || REGION_NUMBER(start) != REGION_NUMBER(end - 1))) { + /* + * If we flush more than a tera-byte or across regions, we're + * probably better off just flushing the entire TLB(s). This + * should be very rare and is not worth optimizing for. + */ + flush_tlb_all(); + } else { + /* flush the address range from the tlb */ + __flush_tlb_range(vma, start, end); + /* flush the virt. page-table area mapping the addr range */ + __flush_tlb_range(vma, ia64_thash(start), ia64_thash(end)); + } +} +EXPORT_SYMBOL(flush_tlb_range); + +void ia64_tlb_init(void) +{ + ia64_ptce_info_t ptce_info; + u64 tr_pgbits; + long status; + pal_vm_info_1_u_t vm_info_1; + pal_vm_info_2_u_t vm_info_2; + int cpu = smp_processor_id(); + + if ((status = ia64_pal_vm_page_size(&tr_pgbits, &purge.mask)) != 0) { + printk(KERN_ERR "PAL_VM_PAGE_SIZE failed with status=%ld; " + "defaulting to architected purge page-sizes.\n", status); + purge.mask = 0x115557000UL; + } + purge.max_bits = ia64_fls(purge.mask); + + ia64_get_ptce(&ptce_info); + local_cpu_data->ptce_base = ptce_info.base; + local_cpu_data->ptce_count[0] = ptce_info.count[0]; + local_cpu_data->ptce_count[1] = ptce_info.count[1]; + local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; + local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; + + local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + status = ia64_pal_vm_summary(&vm_info_1, &vm_info_2); + + if (status) { + printk(KERN_ERR "ia64_pal_vm_summary=%ld\n", status); + per_cpu(ia64_tr_num, cpu) = 8; + return; + } + per_cpu(ia64_tr_num, cpu) = vm_info_1.pal_vm_info_1_s.max_itr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > + (vm_info_1.pal_vm_info_1_s.max_dtr_entry+1)) + per_cpu(ia64_tr_num, cpu) = + vm_info_1.pal_vm_info_1_s.max_dtr_entry+1; + if (per_cpu(ia64_tr_num, cpu) > IA64_TR_ALLOC_MAX) { + static int justonce = 1; + per_cpu(ia64_tr_num, cpu) = IA64_TR_ALLOC_MAX; + if (justonce) { + justonce = 0; + printk(KERN_DEBUG "TR register number exceeds " + "IA64_TR_ALLOC_MAX!\n"); + } + } +} + +/* + * is_tr_overlap + * + * Check overlap with inserted TRs. + */ +static int is_tr_overlap(struct ia64_tr_entry *p, u64 va, u64 log_size) +{ + u64 tr_log_size; + u64 tr_end; + u64 va_rr = ia64_get_rr(va); + u64 va_rid = RR_TO_RID(va_rr); + u64 va_end = va + (1<rr)) + return 0; + tr_log_size = (p->itir & 0xff) >> 2; + tr_end = p->ifa + (1< tr_end || p->ifa > va_end) + return 0; + return 1; + +} + +/* + * ia64_insert_tr in virtual mode. Allocate a TR slot + * + * target_mask : 0x1 : itr, 0x2 : dtr, 0x3 : idtr + * + * va : virtual address. + * pte : pte entries inserted. + * log_size: range to be covered. + * + * Return value: <0 : error No. + * + * >=0 : slot number allocated for TR. + * Must be called with preemption disabled. + */ +int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size) +{ + int i, r; + unsigned long psr; + struct ia64_tr_entry *p; + int cpu = smp_processor_id(); + + if (!ia64_idtrs[cpu]) { + ia64_idtrs[cpu] = kmalloc_array(2 * IA64_TR_ALLOC_MAX, + sizeof(struct ia64_tr_entry), + GFP_KERNEL); + if (!ia64_idtrs[cpu]) + return -ENOMEM; + } + r = -EINVAL; + /*Check overlap with existing TR entries*/ + if (target_mask & 0x1) { + p = ia64_idtrs[cpu]; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG "Overlapped Entry" + "Inserted for TR Register!!\n"); + goto out; + } + } + } + if (target_mask & 0x2) { + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX; + for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu); + i++, p++) { + if (p->pte & 0x1) + if (is_tr_overlap(p, va, log_size)) { + printk(KERN_DEBUG "Overlapped Entry" + "Inserted for TR Register!!\n"); + goto out; + } + } + } + + for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) { + switch (target_mask & 0x3) { + case 1: + if (!((ia64_idtrs[cpu] + i)->pte & 0x1)) + goto found; + continue; + case 2: + if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) + goto found; + continue; + case 3: + if (!((ia64_idtrs[cpu] + i)->pte & 0x1) && + !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) + goto found; + continue; + default: + r = -EINVAL; + goto out; + } + } +found: + if (i >= per_cpu(ia64_tr_num, cpu)) + return -EBUSY; + + /*Record tr info for mca handler use!*/ + if (i > per_cpu(ia64_tr_used, cpu)) + per_cpu(ia64_tr_used, cpu) = i; + + psr = ia64_clear_ic(); + if (target_mask & 0x1) { + ia64_itr(0x1, i, va, pte, log_size); + ia64_srlz_i(); + p = ia64_idtrs[cpu] + i; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + if (target_mask & 0x2) { + ia64_itr(0x2, i, va, pte, log_size); + ia64_srlz_i(); + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i; + p->ifa = va; + p->pte = pte; + p->itir = log_size << 2; + p->rr = ia64_get_rr(va); + } + ia64_set_psr(psr); + r = i; +out: + return r; +} +EXPORT_SYMBOL_GPL(ia64_itr_entry); + +/* + * ia64_purge_tr + * + * target_mask: 0x1: purge itr, 0x2 : purge dtr, 0x3 purge idtr. + * slot: slot number to be freed. + * + * Must be called with preemption disabled. + */ +void ia64_ptr_entry(u64 target_mask, int slot) +{ + int cpu = smp_processor_id(); + int i; + struct ia64_tr_entry *p; + + if (slot < IA64_TR_ALLOC_BASE || slot >= per_cpu(ia64_tr_num, cpu)) + return; + + if (target_mask & 0x1) { + p = ia64_idtrs[cpu] + slot; + if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x1, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + if (target_mask & 0x2) { + p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot; + if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) { + p->pte = 0; + ia64_ptr(0x2, p->ifa, p->itir>>2); + ia64_srlz_i(); + } + } + + for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) { + if (((ia64_idtrs[cpu] + i)->pte & 0x1) || + ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1)) + break; + } + per_cpu(ia64_tr_used, cpu) = i; +} +EXPORT_SYMBOL_GPL(ia64_ptr_entry); diff --git a/arch/ia64/pci/Makefile b/arch/ia64/pci/Makefile new file mode 100644 index 000000000000..81ea50eeb527 --- /dev/null +++ b/arch/ia64/pci/Makefile @@ -0,0 +1,5 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# Makefile for the ia64-specific parts of the pci bus +# +obj-y := pci.o fixup.o diff --git a/arch/ia64/pci/fixup.c b/arch/ia64/pci/fixup.c new file mode 100644 index 000000000000..2bcdd7d3a1ad --- /dev/null +++ b/arch/ia64/pci/fixup.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exceptions for specific devices. Usually work-arounds for fatal design flaws. + * Derived from fixup.c of i386 tree. + */ + +#include +#include +#include +#include +#include + +/* + * Fixup to mark boot BIOS video selected by BIOS before it changes + * + * From information provided by "Jon Smirl" + * + * The standard boot ROM sequence for an x86 machine uses the BIOS + * to select an initial video card for boot display. This boot video + * card will have its BIOS copied to 0xC0000 in system RAM. + * IORESOURCE_ROM_SHADOW is used to associate the boot video + * card with this copy. On laptops this copy has to be used since + * the main ROM may be compressed or combined with another image. + * See pci_map_rom() for use of this flag. Before marking the device + * with IORESOURCE_ROM_SHADOW check if a vga_default_device is already set + * by either arch code or vga-arbitration; if so only apply the fixup to this + * already-determined primary video card. + */ + +static void pci_fixup_video(struct pci_dev *pdev) +{ + struct pci_dev *bridge; + struct pci_bus *bus; + u16 config; + struct resource *res; + + if (is_uv_system()) + return; + /* Maybe, this machine supports legacy memory map. */ + + /* Is VGA routed to us? */ + bus = pdev->bus; + while (bus) { + bridge = bus->self; + + /* + * From information provided by + * "David Miller" + * The bridge control register is valid for PCI header + * type BRIDGE, or CARDBUS. Host to PCI controllers use + * PCI header type NORMAL. + */ + if (bridge && (pci_is_bridge(bridge))) { + pci_read_config_word(bridge, PCI_BRIDGE_CONTROL, + &config); + if (!(config & PCI_BRIDGE_CTL_VGA)) + return; + } + bus = bus->parent; + } + if (!vga_default_device() || pdev == vga_default_device()) { + pci_read_config_word(pdev, PCI_COMMAND, &config); + if (config & (PCI_COMMAND_IO | PCI_COMMAND_MEMORY)) { + res = &pdev->resource[PCI_ROM_RESOURCE]; + + pci_disable_rom(pdev); + if (res->parent) + release_resource(res); + + res->start = 0xC0000; + res->end = res->start + 0x20000 - 1; + res->flags = IORESOURCE_MEM | IORESOURCE_ROM_SHADOW | + IORESOURCE_PCI_FIXED; + dev_info(&pdev->dev, "Video device with shadowed ROM at %pR\n", + res); + } + } +} +DECLARE_PCI_FIXUP_CLASS_HEADER(PCI_ANY_ID, PCI_ANY_ID, + PCI_CLASS_DISPLAY_VGA, 8, pci_fixup_video); diff --git a/arch/ia64/pci/pci.c b/arch/ia64/pci/pci.c new file mode 100644 index 000000000000..0a0328e61bef --- /dev/null +++ b/arch/ia64/pci/pci.c @@ -0,0 +1,576 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * pci.c - Low-Level PCI Access in IA-64 + * + * Derived from bios32.c of i386 tree. + * + * (c) Copyright 2002, 2005 Hewlett-Packard Development Company, L.P. + * David Mosberger-Tang + * Bjorn Helgaas + * Copyright (C) 2004 Silicon Graphics, Inc. + * + * Note: Above list of copyright holders is incomplete... + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +/* + * Low-level SAL-based PCI configuration access functions. Note that SAL + * calls are already serialized (via sal_lock), so we don't need another + * synchronization mechanism here. + */ + +#define PCI_SAL_ADDRESS(seg, bus, devfn, reg) \ + (((u64) seg << 24) | (bus << 16) | (devfn << 8) | (reg)) + +/* SAL 3.2 adds support for extended config space. */ + +#define PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg) \ + (((u64) seg << 28) | (bus << 20) | (devfn << 12) | (reg)) + +int raw_pci_read(unsigned int seg, unsigned int bus, unsigned int devfn, + int reg, int len, u32 *value) +{ + u64 addr, data = 0; + int mode, result; + + if (!value || (seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + if ((seg | reg) <= 255) { + addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); + mode = 0; + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { + addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); + mode = 1; + } else { + return -EINVAL; + } + + result = ia64_sal_pci_config_read(addr, mode, len, &data); + if (result != 0) + return -EINVAL; + + *value = (u32) data; + return 0; +} + +int raw_pci_write(unsigned int seg, unsigned int bus, unsigned int devfn, + int reg, int len, u32 value) +{ + u64 addr; + int mode, result; + + if ((seg > 65535) || (bus > 255) || (devfn > 255) || (reg > 4095)) + return -EINVAL; + + if ((seg | reg) <= 255) { + addr = PCI_SAL_ADDRESS(seg, bus, devfn, reg); + mode = 0; + } else if (sal_revision >= SAL_VERSION_CODE(3,2)) { + addr = PCI_SAL_EXT_ADDRESS(seg, bus, devfn, reg); + mode = 1; + } else { + return -EINVAL; + } + result = ia64_sal_pci_config_write(addr, mode, len, value); + if (result != 0) + return -EINVAL; + return 0; +} + +static int pci_read(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 *value) +{ + return raw_pci_read(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +static int pci_write(struct pci_bus *bus, unsigned int devfn, int where, + int size, u32 value) +{ + return raw_pci_write(pci_domain_nr(bus), bus->number, + devfn, where, size, value); +} + +struct pci_ops pci_root_ops = { + .read = pci_read, + .write = pci_write, +}; + +struct pci_root_info { + struct acpi_pci_root_info common; + struct pci_controller controller; + struct list_head io_resources; +}; + +static unsigned int new_space(u64 phys_base, int sparse) +{ + u64 mmio_base; + int i; + + if (phys_base == 0) + return 0; /* legacy I/O port space */ + + mmio_base = (u64) ioremap(phys_base, 0); + for (i = 0; i < num_io_spaces; i++) + if (io_space[i].mmio_base == mmio_base && + io_space[i].sparse == sparse) + return i; + + if (num_io_spaces == MAX_IO_SPACES) { + pr_err("PCI: Too many IO port spaces " + "(MAX_IO_SPACES=%lu)\n", MAX_IO_SPACES); + return ~0; + } + + i = num_io_spaces++; + io_space[i].mmio_base = mmio_base; + io_space[i].sparse = sparse; + + return i; +} + +static int add_io_space(struct device *dev, struct pci_root_info *info, + struct resource_entry *entry) +{ + struct resource_entry *iospace; + struct resource *resource, *res = entry->res; + char *name; + unsigned long base, min, max, base_port; + unsigned int sparse = 0, space_nr, len; + + len = strlen(info->common.name) + 32; + iospace = resource_list_create_entry(NULL, len); + if (!iospace) { + dev_err(dev, "PCI: No memory for %s I/O port space\n", + info->common.name); + return -ENOMEM; + } + + if (res->flags & IORESOURCE_IO_SPARSE) + sparse = 1; + space_nr = new_space(entry->offset, sparse); + if (space_nr == ~0) + goto free_resource; + + name = (char *)(iospace + 1); + min = res->start - entry->offset; + max = res->end - entry->offset; + base = __pa(io_space[space_nr].mmio_base); + base_port = IO_SPACE_BASE(space_nr); + snprintf(name, len, "%s I/O Ports %08lx-%08lx", info->common.name, + base_port + min, base_port + max); + + /* + * The SDM guarantees the legacy 0-64K space is sparse, but if the + * mapping is done by the processor (not the bridge), ACPI may not + * mark it as sparse. + */ + if (space_nr == 0) + sparse = 1; + + resource = iospace->res; + resource->name = name; + resource->flags = IORESOURCE_MEM; + resource->start = base + (sparse ? IO_SPACE_SPARSE_ENCODING(min) : min); + resource->end = base + (sparse ? IO_SPACE_SPARSE_ENCODING(max) : max); + if (insert_resource(&iomem_resource, resource)) { + dev_err(dev, + "can't allocate host bridge io space resource %pR\n", + resource); + goto free_resource; + } + + entry->offset = base_port; + res->start = min + base_port; + res->end = max + base_port; + resource_list_add_tail(iospace, &info->io_resources); + + return 0; + +free_resource: + resource_list_free_entry(iospace); + return -ENOSPC; +} + +/* + * An IO port or MMIO resource assigned to a PCI host bridge may be + * consumed by the host bridge itself or available to its child + * bus/devices. The ACPI specification defines a bit (Producer/Consumer) + * to tell whether the resource is consumed by the host bridge itself, + * but firmware hasn't used that bit consistently, so we can't rely on it. + * + * On x86 and IA64 platforms, all IO port and MMIO resources are assumed + * to be available to child bus/devices except one special case: + * IO port [0xCF8-0xCFF] is consumed by the host bridge itself + * to access PCI configuration space. + * + * So explicitly filter out PCI CFG IO ports[0xCF8-0xCFF]. + */ +static bool resource_is_pcicfg_ioport(struct resource *res) +{ + return (res->flags & IORESOURCE_IO) && + res->start == 0xCF8 && res->end == 0xCFF; +} + +static int pci_acpi_root_prepare_resources(struct acpi_pci_root_info *ci) +{ + struct device *dev = &ci->bridge->dev; + struct pci_root_info *info; + struct resource *res; + struct resource_entry *entry, *tmp; + int status; + + status = acpi_pci_probe_root_resources(ci); + if (status > 0) { + info = container_of(ci, struct pci_root_info, common); + resource_list_for_each_entry_safe(entry, tmp, &ci->resources) { + res = entry->res; + if (res->flags & IORESOURCE_MEM) { + /* + * HP's firmware has a hack to work around a + * Windows bug. Ignore these tiny memory ranges. + */ + if (resource_size(res) <= 16) { + resource_list_del(entry); + insert_resource(&iomem_resource, + entry->res); + resource_list_add_tail(entry, + &info->io_resources); + } + } else if (res->flags & IORESOURCE_IO) { + if (resource_is_pcicfg_ioport(entry->res)) + resource_list_destroy_entry(entry); + else if (add_io_space(dev, info, entry)) + resource_list_destroy_entry(entry); + } + } + } + + return status; +} + +static void pci_acpi_root_release_info(struct acpi_pci_root_info *ci) +{ + struct pci_root_info *info; + struct resource_entry *entry, *tmp; + + info = container_of(ci, struct pci_root_info, common); + resource_list_for_each_entry_safe(entry, tmp, &info->io_resources) { + release_resource(entry->res); + resource_list_destroy_entry(entry); + } + kfree(info); +} + +static struct acpi_pci_root_ops pci_acpi_root_ops = { + .pci_ops = &pci_root_ops, + .release_info = pci_acpi_root_release_info, + .prepare_resources = pci_acpi_root_prepare_resources, +}; + +struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root) +{ + struct acpi_device *device = root->device; + struct pci_root_info *info; + + info = kzalloc(sizeof(*info), GFP_KERNEL); + if (!info) { + dev_err(&device->dev, + "pci_bus %04x:%02x: ignored (out of memory)\n", + root->segment, (int)root->secondary.start); + return NULL; + } + + info->controller.segment = root->segment; + info->controller.companion = device; + info->controller.node = acpi_get_node(device->handle); + INIT_LIST_HEAD(&info->io_resources); + return acpi_pci_root_create(root, &pci_acpi_root_ops, + &info->common, &info->controller); +} + +int pcibios_root_bridge_prepare(struct pci_host_bridge *bridge) +{ + /* + * We pass NULL as parent to pci_create_root_bus(), so if it is not NULL + * here, pci_create_root_bus() has been called by someone else and + * sysdata is likely to be different from what we expect. Let it go in + * that case. + */ + if (!bridge->dev.parent) { + struct pci_controller *controller = bridge->bus->sysdata; + ACPI_COMPANION_SET(&bridge->dev, controller->companion); + } + return 0; +} + +void pcibios_fixup_device_resources(struct pci_dev *dev) +{ + int idx; + + if (!dev->bus) + return; + + for (idx = 0; idx < PCI_BRIDGE_RESOURCES; idx++) { + struct resource *r = &dev->resource[idx]; + + if (!r->flags || r->parent || !r->start) + continue; + + pci_claim_resource(dev, idx); + } +} +EXPORT_SYMBOL_GPL(pcibios_fixup_device_resources); + +static void pcibios_fixup_bridge_resources(struct pci_dev *dev) +{ + int idx; + + if (!dev->bus) + return; + + for (idx = PCI_BRIDGE_RESOURCES; idx < PCI_NUM_RESOURCES; idx++) { + struct resource *r = &dev->resource[idx]; + + if (!r->flags || r->parent || !r->start) + continue; + + pci_claim_bridge_resource(dev, idx); + } +} + +/* + * Called after each bus is probed, but before its children are examined. + */ +void pcibios_fixup_bus(struct pci_bus *b) +{ + struct pci_dev *dev; + + if (b->self) { + pci_read_bridge_bases(b); + pcibios_fixup_bridge_resources(b->self); + } + list_for_each_entry(dev, &b->devices, bus_list) + pcibios_fixup_device_resources(dev); +} + +void pcibios_add_bus(struct pci_bus *bus) +{ + acpi_pci_add_bus(bus); +} + +void pcibios_remove_bus(struct pci_bus *bus) +{ + acpi_pci_remove_bus(bus); +} + +void pcibios_set_master (struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} + +int +pcibios_enable_device (struct pci_dev *dev, int mask) +{ + int ret; + + ret = pci_enable_resources(dev, mask); + if (ret < 0) + return ret; + + if (!pci_dev_msi_enabled(dev)) + return acpi_pci_irq_enable(dev); + return 0; +} + +void +pcibios_disable_device (struct pci_dev *dev) +{ + BUG_ON(atomic_read(&dev->enable_cnt)); + if (!pci_dev_msi_enabled(dev)) + acpi_pci_irq_disable(dev); +} + +/** + * pci_get_legacy_mem - generic legacy mem routine + * @bus: bus to get legacy memory base address for + * + * Find the base of legacy memory for @bus. This is typically the first + * megabyte of bus address space for @bus or is simply 0 on platforms whose + * chipsets support legacy I/O and memory routing. Returns the base address + * or an error pointer if an error occurred. + * + * This is the ia64 generic version of this routine. Other platforms + * are free to override it with a machine vector. + */ +char *pci_get_legacy_mem(struct pci_bus *bus) +{ + return (char *)__IA64_UNCACHED_OFFSET; +} + +/** + * pci_mmap_legacy_page_range - map legacy memory space to userland + * @bus: bus whose legacy space we're mapping + * @vma: vma passed in by mmap + * + * Map legacy memory space for this device back to userspace using a machine + * vector to get the base address. + */ +int +pci_mmap_legacy_page_range(struct pci_bus *bus, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state) +{ + unsigned long size = vma->vm_end - vma->vm_start; + pgprot_t prot; + char *addr; + + /* We only support mmap'ing of legacy memory space */ + if (mmap_state != pci_mmap_mem) + return -ENOSYS; + + /* + * Avoid attribute aliasing. See Documentation/arch/ia64/aliasing.rst + * for more details. + */ + if (!valid_mmap_phys_addr_range(vma->vm_pgoff, size)) + return -EINVAL; + prot = phys_mem_access_prot(NULL, vma->vm_pgoff, size, + vma->vm_page_prot); + + addr = pci_get_legacy_mem(bus); + if (IS_ERR(addr)) + return PTR_ERR(addr); + + vma->vm_pgoff += (unsigned long)addr >> PAGE_SHIFT; + vma->vm_page_prot = prot; + + if (remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff, + size, vma->vm_page_prot)) + return -EAGAIN; + + return 0; +} + +/** + * pci_legacy_read - read from legacy I/O space + * @bus: bus to read + * @port: legacy port value + * @val: caller allocated storage for returned value + * @size: number of bytes to read + * + * Simply reads @size bytes from @port and puts the result in @val. + * + * Again, this (and the write routine) are generic versions that can be + * overridden by the platform. This is necessary on platforms that don't + * support legacy I/O routing or that hard fail on legacy I/O timeouts. + */ +int pci_legacy_read(struct pci_bus *bus, u16 port, u32 *val, u8 size) +{ + int ret = size; + + switch (size) { + case 1: + *val = inb(port); + break; + case 2: + *val = inw(port); + break; + case 4: + *val = inl(port); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * pci_legacy_write - perform a legacy I/O write + * @bus: bus pointer + * @port: port to write + * @val: value to write + * @size: number of bytes to write from @val + * + * Simply writes @size bytes of @val to @port. + */ +int pci_legacy_write(struct pci_bus *bus, u16 port, u32 val, u8 size) +{ + int ret = size; + + switch (size) { + case 1: + outb(val, port); + break; + case 2: + outw(val, port); + break; + case 4: + outl(val, port); + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +/** + * set_pci_cacheline_size - determine cacheline size for PCI devices + * + * We want to use the line-size of the outer-most cache. We assume + * that this line-size is the same for all CPUs. + * + * Code mostly taken from arch/ia64/kernel/palinfo.c:cache_info(). + */ +static void __init set_pci_dfl_cacheline_size(void) +{ + unsigned long levels, unique_caches; + long status; + pal_cache_config_info_t cci; + + status = ia64_pal_cache_summary(&levels, &unique_caches); + if (status != 0) { + pr_err("%s: ia64_pal_cache_summary() failed " + "(status=%ld)\n", __func__, status); + return; + } + + status = ia64_pal_cache_config_info(levels - 1, + /* cache_type (data_or_unified)= */ 2, &cci); + if (status != 0) { + pr_err("%s: ia64_pal_cache_config_info() failed " + "(status=%ld)\n", __func__, status); + return; + } + pci_dfl_cache_line_size = (1 << cci.pcci_line_size) / 4; +} + +static int __init pcibios_init(void) +{ + set_pci_dfl_cacheline_size(); + return 0; +} + +subsys_initcall(pcibios_init); diff --git a/arch/ia64/scripts/check-gas b/arch/ia64/scripts/check-gas new file mode 100755 index 000000000000..787cf9b6b04a --- /dev/null +++ b/arch/ia64/scripts/check-gas @@ -0,0 +1,16 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +dir=$(dirname $0) +CC=$1 +OBJDUMP=$2 +tmp=${TMPDIR:-/tmp} +out=$tmp/out$$.o +$CC -c $dir/check-gas-asm.S -o $out +res=$($OBJDUMP -r --section .data $out | fgrep 00004 | tr -s ' ' |cut -f3 -d' ') +rm -f $out +if [ $res != ".text" ]; then + echo buggy +else + echo good +fi +exit 0 diff --git a/arch/ia64/scripts/check-gas-asm.S b/arch/ia64/scripts/check-gas-asm.S new file mode 100644 index 000000000000..010e1d227e5d --- /dev/null +++ b/arch/ia64/scripts/check-gas-asm.S @@ -0,0 +1,2 @@ +[1:] nop 0 + .xdata4 ".data", 0, 1b-. diff --git a/arch/ia64/scripts/check-model.c b/arch/ia64/scripts/check-model.c new file mode 100644 index 000000000000..e1d4e86e3d63 --- /dev/null +++ b/arch/ia64/scripts/check-model.c @@ -0,0 +1 @@ +int __attribute__ ((__model__ (__small__))) x; diff --git a/arch/ia64/scripts/check-segrel.S b/arch/ia64/scripts/check-segrel.S new file mode 100644 index 000000000000..65d6378adaaa --- /dev/null +++ b/arch/ia64/scripts/check-segrel.S @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .rodata + data4 @segrel(start) + .data +start: diff --git a/arch/ia64/scripts/check-segrel.lds b/arch/ia64/scripts/check-segrel.lds new file mode 100644 index 000000000000..c385d246e458 --- /dev/null +++ b/arch/ia64/scripts/check-segrel.lds @@ -0,0 +1,13 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +SECTIONS { + . = SIZEOF_HEADERS; + .rodata : { *(.rodata) } :ro + .note : { *(.note*) } + . = 0xa0000; + .data : { *(.data) } :dat + /DISCARD/ : { *(*) } +} +PHDRS { + ro PT_LOAD FILEHDR PHDRS; + dat PT_LOAD; +} diff --git a/arch/ia64/scripts/check-serialize.S b/arch/ia64/scripts/check-serialize.S new file mode 100644 index 000000000000..0400c106806c --- /dev/null +++ b/arch/ia64/scripts/check-serialize.S @@ -0,0 +1,2 @@ + .serialize.data + .serialize.instruction diff --git a/arch/ia64/scripts/check-text-align.S b/arch/ia64/scripts/check-text-align.S new file mode 100644 index 000000000000..107fa1c88c2e --- /dev/null +++ b/arch/ia64/scripts/check-text-align.S @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + .proc foo + .prologue +foo: .save rp, r2 + nop 0 + .align 64 + .endp foo diff --git a/arch/ia64/scripts/toolchain-flags b/arch/ia64/scripts/toolchain-flags new file mode 100755 index 000000000000..12dff5c981cf --- /dev/null +++ b/arch/ia64/scripts/toolchain-flags @@ -0,0 +1,54 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Check whether linker can handle cross-segment @segrel(): +# +CPPFLAGS="" +CC=$1 +OBJDUMP=$2 +READELF=$3 +dir=$(dirname $0) +tmp=${TMPDIR:-/tmp} +out=$tmp/out$$ + +# Check whether cross-segment segment-relative relocs work fine. We need +# that for building the gate DSO: + +$CC -nostdlib -static -Wl,-T$dir/check-segrel.lds $dir/check-segrel.S -o $out +res=$($OBJDUMP --full --section .rodata $out | fgrep 000 | cut -f3 -d' ') +rm -f $out +if [ $res != 00000a00 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_BUGGY_SEGREL" + cat >&2 <&1 | grep __model__ | grep -q attrib +then + CPPFLAGS="$CPPFLAGS -DHAVE_MODEL_SMALL_ATTRIBUTE" +fi +rm -f $out + +# Check whether assembler supports .serialize.{data,instruction} directive. + +$CC -c $dir/check-serialize.S -o $out 2>/dev/null +res=$? +rm -f $out +if [ $res -eq 0 ]; then + CPPFLAGS="$CPPFLAGS -DHAVE_SERIALIZE_DIRECTIVE" +fi + +echo $CPPFLAGS diff --git a/arch/ia64/scripts/unwcheck.py b/arch/ia64/scripts/unwcheck.py new file mode 100644 index 000000000000..9581742f0db2 --- /dev/null +++ b/arch/ia64/scripts/unwcheck.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +# +# Usage: unwcheck.py FILE +# +# This script checks the unwind info of each function in file FILE +# and verifies that the sum of the region-lengths matches the total +# length of the function. +# +# Based on a shell/awk script originally written by Harish Patil, +# which was converted to Perl by Matthew Chapman, which was converted +# to Python by David Mosberger. +# +import os +import re +import sys + +if len(sys.argv) != 2: + print("Usage: %s FILE" % sys.argv[0]) + sys.exit(2) + +readelf = os.getenv("READELF", "readelf") + +start_pattern = re.compile("<([^>]*)>: \[0x([0-9a-f]+)-0x([0-9a-f]+)\]") +rlen_pattern = re.compile(".*rlen=([0-9]+)") + +def check_func (func, slots, rlen_sum): + if slots != rlen_sum: + global num_errors + num_errors += 1 + if not func: func = "[%#x-%#x]" % (start, end) + print("ERROR: %s: %lu slots, total region length = %lu" % (func, slots, rlen_sum)) + return + +num_funcs = 0 +num_errors = 0 +func = False +slots = 0 +rlen_sum = 0 +for line in os.popen("%s -u %s" % (readelf, sys.argv[1])): + m = start_pattern.match(line) + if m: + check_func(func, slots, rlen_sum) + + func = m.group(1) + start = int(m.group(2), 16) + end = int(m.group(3), 16) + slots = 3 * (end - start) / 16 + rlen_sum = 0 + num_funcs += 1 + else: + m = rlen_pattern.match(line) + if m: + rlen_sum += int(m.group(1)) +check_func(func, slots, rlen_sum) + +if num_errors == 0: + print("No errors detected in %u functions." % num_funcs) +else: + if num_errors > 1: + err="errors" + else: + err="error" + print("%u %s detected in %u functions." % (num_errors, err, num_funcs)) + sys.exit(1) diff --git a/arch/ia64/uv/Makefile b/arch/ia64/uv/Makefile new file mode 100644 index 000000000000..aa9f91947c49 --- /dev/null +++ b/arch/ia64/uv/Makefile @@ -0,0 +1,12 @@ +# arch/ia64/uv/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2008 Silicon Graphics, Inc. All Rights Reserved. +# +# Makefile for the sn uv subplatform +# + +obj-y += kernel/ diff --git a/arch/ia64/uv/kernel/Makefile b/arch/ia64/uv/kernel/Makefile new file mode 100644 index 000000000000..297196578d19 --- /dev/null +++ b/arch/ia64/uv/kernel/Makefile @@ -0,0 +1,12 @@ +# arch/ia64/uv/kernel/Makefile +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2008 Silicon Graphics, Inc. All Rights Reserved. +# + +ccflags-y := -Iarch/ia64/sn/include + +obj-y += setup.o diff --git a/arch/ia64/uv/kernel/setup.c b/arch/ia64/uv/kernel/setup.c new file mode 100644 index 000000000000..bb025486d791 --- /dev/null +++ b/arch/ia64/uv/kernel/setup.c @@ -0,0 +1,120 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * SGI UV Core Functions + * + * Copyright (C) 2008 Silicon Graphics, Inc. All rights reserved. + */ + +#include +#include +#include +#include +#include +#include +#include + +bool ia64_is_uv; +EXPORT_SYMBOL_GPL(ia64_is_uv); + +DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); +EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); + +struct redir_addr { + unsigned long redirect; + unsigned long alias; +}; + +#define DEST_SHIFT UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR_DEST_BASE_SHFT + +static __initdata struct redir_addr redir_addrs[] = { + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR, UVH_SI_ALIAS0_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR, UVH_SI_ALIAS1_OVERLAY_CONFIG}, + {UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR, UVH_SI_ALIAS2_OVERLAY_CONFIG}, +}; + +static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) +{ + union uvh_si_alias0_overlay_config_u alias; + union uvh_rh_gam_alias210_redirect_config_2_mmr_u redirect; + int i; + + for (i = 0; i < ARRAY_SIZE(redir_addrs); i++) { + alias.v = uv_read_local_mmr(redir_addrs[i].alias); + if (alias.s.base == 0) { + *size = (1UL << alias.s.m_alias); + redirect.v = uv_read_local_mmr(redir_addrs[i].redirect); + *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; + return; + } + } + BUG(); +} + +void __init uv_probe_system_type(void) +{ + struct acpi_table_rsdp *rsdp; + struct acpi_table_xsdt *xsdt; + + if (efi.acpi20 == EFI_INVALID_TABLE_ADDR) { + pr_err("ACPI 2.0 RSDP not found.\n"); + return; + } + + rsdp = (struct acpi_table_rsdp *)__va(efi.acpi20); + if (strncmp(rsdp->signature, ACPI_SIG_RSDP, sizeof(ACPI_SIG_RSDP) - 1)) { + pr_err("ACPI 2.0 RSDP signature incorrect.\n"); + return; + } + + xsdt = (struct acpi_table_xsdt *)__va(rsdp->xsdt_physical_address); + if (strncmp(xsdt->header.signature, ACPI_SIG_XSDT, + sizeof(ACPI_SIG_XSDT) - 1)) { + pr_err("ACPI 2.0 XSDT signature incorrect.\n"); + return; + } + + if (!strcmp(xsdt->header.oem_id, "SGI") && + !strcmp(xsdt->header.oem_table_id + 4, "UV")) + ia64_is_uv = true; +} + +void __init uv_setup(char **cmdline_p) +{ + union uvh_si_addr_map_config_u m_n_config; + union uvh_node_id_u node_id; + unsigned long gnode_upper; + int nid, cpu, m_val, n_val; + unsigned long mmr_base, lowmem_redir_base, lowmem_redir_size; + + get_lowmem_redirect(&lowmem_redir_base, &lowmem_redir_size); + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); + mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & + ~UV_MMR_ENABLE; + + m_val = m_n_config.s.m_skt; + n_val = m_n_config.s.n_skt; + printk(KERN_DEBUG "UV: global MMR base 0x%lx\n", mmr_base); + + gnode_upper = (((unsigned long)node_id.s.node_id) & + ~((1 << n_val) - 1)) << m_val; + + for_each_present_cpu(cpu) { + nid = cpu_to_node(cpu); + uv_cpu_hub_info(cpu)->lowmem_remap_base = lowmem_redir_base; + uv_cpu_hub_info(cpu)->lowmem_remap_top = + lowmem_redir_base + lowmem_redir_size; + uv_cpu_hub_info(cpu)->m_val = m_val; + uv_cpu_hub_info(cpu)->n_val = n_val; + uv_cpu_hub_info(cpu)->pnode_mask = (1 << n_val) -1; + uv_cpu_hub_info(cpu)->gpa_mask = (1 << (m_val + n_val)) - 1; + uv_cpu_hub_info(cpu)->gnode_upper = gnode_upper; + uv_cpu_hub_info(cpu)->global_mmr_base = mmr_base; + uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ + printk(KERN_DEBUG "UV cpu %d, nid %d\n", cpu, nid); + } +} + diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c index 762e4d09aacf..0bee7ca9a77c 100644 --- a/arch/powerpc/kexec/core_64.c +++ b/arch/powerpc/kexec/core_64.c @@ -283,7 +283,8 @@ static void kexec_prepare_cpus(void) * We could use a smaller stack if we don't care about anything using * current, but that audit has not been performed. */ -static union thread_union kexec_stack = { }; +static union thread_union kexec_stack __init_task_data = + { }; /* * For similar reasons to the stack above, the kexecing CPU needs to be on a diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 3c3f8037ebed..f09ee205f5a9 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -262,7 +262,7 @@ config ACPI_CPU_FREQ_PSS config ACPI_PROCESSOR_CSTATE def_bool y depends on ACPI_PROCESSOR - depends on X86 + depends on IA64 || X86 config ACPI_PROCESSOR_IDLE bool @@ -286,9 +286,9 @@ config ACPI_CPPC_LIB config ACPI_PROCESSOR tristate "Processor" - depends on X86 || ARM64 || LOONGARCH + depends on X86 || IA64 || ARM64 || LOONGARCH select ACPI_PROCESSOR_IDLE - select ACPI_CPU_FREQ_PSS if X86 || LOONGARCH + select ACPI_CPU_FREQ_PSS if X86 || IA64 || LOONGARCH select THERMAL default y help diff --git a/drivers/acpi/numa/Kconfig b/drivers/acpi/numa/Kconfig index 849c2bd820b9..39b1f34c21df 100644 --- a/drivers/acpi/numa/Kconfig +++ b/drivers/acpi/numa/Kconfig @@ -2,8 +2,8 @@ config ACPI_NUMA bool "NUMA support" depends on NUMA - depends on (X86 || ARM64 || LOONGARCH) - default y if ARM64 + depends on (X86 || IA64 || ARM64 || LOONGARCH) + default y if IA64 || ARM64 config ACPI_HMAT bool "ACPI Heterogeneous Memory Attribute Table Support" diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c index 70af3fbbebe5..78a81969d90e 100644 --- a/drivers/acpi/osl.c +++ b/drivers/acpi/osl.c @@ -276,7 +276,7 @@ acpi_map_lookup_virt(void __iomem *virt, acpi_size size) return NULL; } -#if defined(CONFIG_ARM64) || defined(CONFIG_RISCV) +#if defined(CONFIG_IA64) || defined(CONFIG_ARM64) || defined(CONFIG_RISCV) /* ioremap will take care of cache attributes */ #define should_use_kmap(pfn) 0 #else diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig index 7c8dd0abcfdf..625af75833fc 100644 --- a/drivers/char/Kconfig +++ b/drivers/char/Kconfig @@ -348,7 +348,7 @@ config DEVPORT device is similar to /dev/mem, but for I/O ports. config HPET - bool "HPET - High Precision Event Timer" if X86 + bool "HPET - High Precision Event Timer" if (X86 || IA64) default n depends on ACPI help @@ -377,7 +377,7 @@ config HPET_MMAP_DEFAULT config HANGCHECK_TIMER tristate "Hangcheck timer" - depends on X86 || PPC64 || S390 + depends on X86 || IA64 || PPC64 || S390 help The hangcheck-timer module detects when the system has gone out to lunch past a certain margin. It can reboot the system diff --git a/drivers/char/Makefile b/drivers/char/Makefile index e9b360cdc99a..c5f532e412f1 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_TTY_PRINTK) += ttyprintk.o obj-y += misc.o obj-$(CONFIG_ATARI_DSP56K) += dsp56k.o obj-$(CONFIG_VIRTIO_CONSOLE) += virtio_console.o +obj-$(CONFIG_MSPEC) += mspec.o obj-$(CONFIG_UV_MMTIMER) += uv_mmtimer.o obj-$(CONFIG_IBM_BSR) += bsr.o diff --git a/drivers/char/agp/Kconfig b/drivers/char/agp/Kconfig index c47eb7bf06d4..4f501e4842ab 100644 --- a/drivers/char/agp/Kconfig +++ b/drivers/char/agp/Kconfig @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 menuconfig AGP tristate "/dev/agpgart (AGP Support)" - depends on ALPHA || PARISC || PPC || X86 + depends on ALPHA || IA64 || PARISC || PPC || X86 depends on PCI help AGP (Accelerated Graphics Port) is a bus system mainly used to @@ -109,6 +109,20 @@ config AGP_VIA This option gives you AGP support for the GLX component of X on VIA MVP3/Apollo Pro chipsets. +config AGP_I460 + tristate "Intel 460GX chipset support" + depends on AGP && IA64 + help + This option gives you AGP GART support for the Intel 460GX chipset + for IA64 processors. + +config AGP_HP_ZX1 + tristate "HP ZX1 chipset AGP support" + depends on AGP && IA64 + help + This option gives you AGP GART support for the HP ZX1 chipset + for IA64 processors. + config AGP_PARISC tristate "HP Quicksilver AGP support" depends on AGP && PARISC && 64BIT && IOMMU_SBA diff --git a/drivers/char/agp/Makefile b/drivers/char/agp/Makefile index 43b09cf193bb..04e294642e7c 100644 --- a/drivers/char/agp/Makefile +++ b/drivers/char/agp/Makefile @@ -8,7 +8,9 @@ obj-$(CONFIG_AGP_AMD) += amd-k7-agp.o obj-$(CONFIG_AGP_AMD64) += amd64-agp.o obj-$(CONFIG_AGP_ALPHA_CORE) += alpha-agp.o obj-$(CONFIG_AGP_EFFICEON) += efficeon-agp.o +obj-$(CONFIG_AGP_HP_ZX1) += hp-agp.o obj-$(CONFIG_AGP_PARISC) += parisc-agp.o +obj-$(CONFIG_AGP_I460) += i460-agp.o obj-$(CONFIG_AGP_INTEL) += intel-agp.o obj-$(CONFIG_INTEL_GTT) += intel-gtt.o obj-$(CONFIG_AGP_NVIDIA) += nvidia-agp.o diff --git a/drivers/char/agp/hp-agp.c b/drivers/char/agp/hp-agp.c new file mode 100644 index 000000000000..84d9adbb62f6 --- /dev/null +++ b/drivers/char/agp/hp-agp.c @@ -0,0 +1,550 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * HP zx1 AGPGART routines. + * + * (c) Copyright 2002, 2003 Hewlett-Packard Development Company, L.P. + * Bjorn Helgaas + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "agp.h" + +#define HP_ZX1_IOC_OFFSET 0x1000 /* ACPI reports SBA, we want IOC */ + +/* HP ZX1 IOC registers */ +#define HP_ZX1_IBASE 0x300 +#define HP_ZX1_IMASK 0x308 +#define HP_ZX1_PCOM 0x310 +#define HP_ZX1_TCNFG 0x318 +#define HP_ZX1_PDIR_BASE 0x320 + +#define HP_ZX1_IOVA_BASE GB(1UL) +#define HP_ZX1_IOVA_SIZE GB(1UL) +#define HP_ZX1_GART_SIZE (HP_ZX1_IOVA_SIZE / 2) +#define HP_ZX1_SBA_IOMMU_COOKIE 0x0000badbadc0ffeeUL + +#define HP_ZX1_PDIR_VALID_BIT 0x8000000000000000UL +#define HP_ZX1_IOVA_TO_PDIR(va) ((va - hp_private.iova_base) >> hp_private.io_tlb_shift) + +#define AGP8X_MODE_BIT 3 +#define AGP8X_MODE (1 << AGP8X_MODE_BIT) + +/* AGP bridge need not be PCI device, but DRM thinks it is. */ +static struct pci_dev fake_bridge_dev; + +static int hp_zx1_gart_found; + +static struct aper_size_info_fixed hp_zx1_sizes[] = +{ + {0, 0, 0}, /* filled in by hp_zx1_fetch_size() */ +}; + +static struct gatt_mask hp_zx1_masks[] = +{ + {.mask = HP_ZX1_PDIR_VALID_BIT, .type = 0} +}; + +static struct _hp_private { + volatile u8 __iomem *ioc_regs; + volatile u8 __iomem *lba_regs; + int lba_cap_offset; + u64 *io_pdir; // PDIR for entire IOVA + u64 *gatt; // PDIR just for GART (subset of above) + u64 gatt_entries; + u64 iova_base; + u64 gart_base; + u64 gart_size; + u64 io_pdir_size; + int io_pdir_owner; // do we own it, or share it with sba_iommu? + int io_page_size; + int io_tlb_shift; + int io_tlb_ps; // IOC ps config + int io_pages_per_kpage; +} hp_private; + +static int __init hp_zx1_ioc_shared(void) +{ + struct _hp_private *hp = &hp_private; + + printk(KERN_INFO PFX "HP ZX1 IOC: IOPDIR shared with sba_iommu\n"); + + /* + * IOC already configured by sba_iommu module; just use + * its setup. We assume: + * - IOVA space is 1Gb in size + * - first 512Mb is IOMMU, second 512Mb is GART + */ + hp->io_tlb_ps = readq(hp->ioc_regs+HP_ZX1_TCNFG); + switch (hp->io_tlb_ps) { + case 0: hp->io_tlb_shift = 12; break; + case 1: hp->io_tlb_shift = 13; break; + case 2: hp->io_tlb_shift = 14; break; + case 3: hp->io_tlb_shift = 16; break; + default: + printk(KERN_ERR PFX "Invalid IOTLB page size " + "configuration 0x%x\n", hp->io_tlb_ps); + hp->gatt = NULL; + hp->gatt_entries = 0; + return -ENODEV; + } + hp->io_page_size = 1 << hp->io_tlb_shift; + hp->io_pages_per_kpage = PAGE_SIZE / hp->io_page_size; + + hp->iova_base = readq(hp->ioc_regs+HP_ZX1_IBASE) & ~0x1; + hp->gart_base = hp->iova_base + HP_ZX1_IOVA_SIZE - HP_ZX1_GART_SIZE; + + hp->gart_size = HP_ZX1_GART_SIZE; + hp->gatt_entries = hp->gart_size / hp->io_page_size; + + hp->io_pdir = phys_to_virt(readq(hp->ioc_regs+HP_ZX1_PDIR_BASE)); + hp->gatt = &hp->io_pdir[HP_ZX1_IOVA_TO_PDIR(hp->gart_base)]; + + if (hp->gatt[0] != HP_ZX1_SBA_IOMMU_COOKIE) { + /* Normal case when no AGP device in system */ + hp->gatt = NULL; + hp->gatt_entries = 0; + printk(KERN_ERR PFX "No reserved IO PDIR entry found; " + "GART disabled\n"); + return -ENODEV; + } + + return 0; +} + +static int __init +hp_zx1_ioc_owner (void) +{ + struct _hp_private *hp = &hp_private; + + printk(KERN_INFO PFX "HP ZX1 IOC: IOPDIR dedicated to GART\n"); + + /* + * Select an IOV page size no larger than system page size. + */ + if (PAGE_SIZE >= KB(64)) { + hp->io_tlb_shift = 16; + hp->io_tlb_ps = 3; + } else if (PAGE_SIZE >= KB(16)) { + hp->io_tlb_shift = 14; + hp->io_tlb_ps = 2; + } else if (PAGE_SIZE >= KB(8)) { + hp->io_tlb_shift = 13; + hp->io_tlb_ps = 1; + } else { + hp->io_tlb_shift = 12; + hp->io_tlb_ps = 0; + } + hp->io_page_size = 1 << hp->io_tlb_shift; + hp->io_pages_per_kpage = PAGE_SIZE / hp->io_page_size; + + hp->iova_base = HP_ZX1_IOVA_BASE; + hp->gart_size = HP_ZX1_GART_SIZE; + hp->gart_base = hp->iova_base + HP_ZX1_IOVA_SIZE - hp->gart_size; + + hp->gatt_entries = hp->gart_size / hp->io_page_size; + hp->io_pdir_size = (HP_ZX1_IOVA_SIZE / hp->io_page_size) * sizeof(u64); + + return 0; +} + +static int __init +hp_zx1_ioc_init (u64 hpa) +{ + struct _hp_private *hp = &hp_private; + + hp->ioc_regs = ioremap(hpa, 1024); + if (!hp->ioc_regs) + return -ENOMEM; + + /* + * If the IOTLB is currently disabled, we can take it over. + * Otherwise, we have to share with sba_iommu. + */ + hp->io_pdir_owner = (readq(hp->ioc_regs+HP_ZX1_IBASE) & 0x1) == 0; + + if (hp->io_pdir_owner) + return hp_zx1_ioc_owner(); + + return hp_zx1_ioc_shared(); +} + +static int +hp_zx1_lba_find_capability (volatile u8 __iomem *hpa, int cap) +{ + u16 status; + u8 pos, id; + int ttl = 48; + + status = readw(hpa+PCI_STATUS); + if (!(status & PCI_STATUS_CAP_LIST)) + return 0; + pos = readb(hpa+PCI_CAPABILITY_LIST); + while (ttl-- && pos >= 0x40) { + pos &= ~3; + id = readb(hpa+pos+PCI_CAP_LIST_ID); + if (id == 0xff) + break; + if (id == cap) + return pos; + pos = readb(hpa+pos+PCI_CAP_LIST_NEXT); + } + return 0; +} + +static int __init +hp_zx1_lba_init (u64 hpa) +{ + struct _hp_private *hp = &hp_private; + int cap; + + hp->lba_regs = ioremap(hpa, 256); + if (!hp->lba_regs) + return -ENOMEM; + + hp->lba_cap_offset = hp_zx1_lba_find_capability(hp->lba_regs, PCI_CAP_ID_AGP); + + cap = readl(hp->lba_regs+hp->lba_cap_offset) & 0xff; + if (cap != PCI_CAP_ID_AGP) { + printk(KERN_ERR PFX "Invalid capability ID 0x%02x at 0x%x\n", + cap, hp->lba_cap_offset); + iounmap(hp->lba_regs); + return -ENODEV; + } + + return 0; +} + +static int +hp_zx1_fetch_size(void) +{ + int size; + + size = hp_private.gart_size / MB(1); + hp_zx1_sizes[0].size = size; + agp_bridge->current_size = (void *) &hp_zx1_sizes[0]; + return size; +} + +static int +hp_zx1_configure (void) +{ + struct _hp_private *hp = &hp_private; + + agp_bridge->gart_bus_addr = hp->gart_base; + agp_bridge->capndx = hp->lba_cap_offset; + agp_bridge->mode = readl(hp->lba_regs+hp->lba_cap_offset+PCI_AGP_STATUS); + + if (hp->io_pdir_owner) { + writel(virt_to_phys(hp->io_pdir), hp->ioc_regs+HP_ZX1_PDIR_BASE); + readl(hp->ioc_regs+HP_ZX1_PDIR_BASE); + writel(hp->io_tlb_ps, hp->ioc_regs+HP_ZX1_TCNFG); + readl(hp->ioc_regs+HP_ZX1_TCNFG); + writel((unsigned int)(~(HP_ZX1_IOVA_SIZE-1)), hp->ioc_regs+HP_ZX1_IMASK); + readl(hp->ioc_regs+HP_ZX1_IMASK); + writel(hp->iova_base|1, hp->ioc_regs+HP_ZX1_IBASE); + readl(hp->ioc_regs+HP_ZX1_IBASE); + writel(hp->iova_base|ilog2(HP_ZX1_IOVA_SIZE), hp->ioc_regs+HP_ZX1_PCOM); + readl(hp->ioc_regs+HP_ZX1_PCOM); + } + + return 0; +} + +static void +hp_zx1_cleanup (void) +{ + struct _hp_private *hp = &hp_private; + + if (hp->ioc_regs) { + if (hp->io_pdir_owner) { + writeq(0, hp->ioc_regs+HP_ZX1_IBASE); + readq(hp->ioc_regs+HP_ZX1_IBASE); + } + iounmap(hp->ioc_regs); + } + if (hp->lba_regs) + iounmap(hp->lba_regs); +} + +static void +hp_zx1_tlbflush (struct agp_memory *mem) +{ + struct _hp_private *hp = &hp_private; + + writeq(hp->gart_base | ilog2(hp->gart_size), hp->ioc_regs+HP_ZX1_PCOM); + readq(hp->ioc_regs+HP_ZX1_PCOM); +} + +static int +hp_zx1_create_gatt_table (struct agp_bridge_data *bridge) +{ + struct _hp_private *hp = &hp_private; + int i; + + if (hp->io_pdir_owner) { + hp->io_pdir = (u64 *) __get_free_pages(GFP_KERNEL, + get_order(hp->io_pdir_size)); + if (!hp->io_pdir) { + printk(KERN_ERR PFX "Couldn't allocate contiguous " + "memory for I/O PDIR\n"); + hp->gatt = NULL; + hp->gatt_entries = 0; + return -ENOMEM; + } + memset(hp->io_pdir, 0, hp->io_pdir_size); + + hp->gatt = &hp->io_pdir[HP_ZX1_IOVA_TO_PDIR(hp->gart_base)]; + } + + for (i = 0; i < hp->gatt_entries; i++) { + hp->gatt[i] = (unsigned long) agp_bridge->scratch_page; + } + + return 0; +} + +static int +hp_zx1_free_gatt_table (struct agp_bridge_data *bridge) +{ + struct _hp_private *hp = &hp_private; + + if (hp->io_pdir_owner) + free_pages((unsigned long) hp->io_pdir, + get_order(hp->io_pdir_size)); + else + hp->gatt[0] = HP_ZX1_SBA_IOMMU_COOKIE; + return 0; +} + +static int +hp_zx1_insert_memory (struct agp_memory *mem, off_t pg_start, int type) +{ + struct _hp_private *hp = &hp_private; + int i, k; + off_t j, io_pg_start; + int io_pg_count; + + if (type != mem->type || + agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) { + return -EINVAL; + } + + io_pg_start = hp->io_pages_per_kpage * pg_start; + io_pg_count = hp->io_pages_per_kpage * mem->page_count; + if ((io_pg_start + io_pg_count) > hp->gatt_entries) { + return -EINVAL; + } + + j = io_pg_start; + while (j < (io_pg_start + io_pg_count)) { + if (hp->gatt[j]) { + return -EBUSY; + } + j++; + } + + if (!mem->is_flushed) { + global_cache_flush(); + mem->is_flushed = true; + } + + for (i = 0, j = io_pg_start; i < mem->page_count; i++) { + unsigned long paddr; + + paddr = page_to_phys(mem->pages[i]); + for (k = 0; + k < hp->io_pages_per_kpage; + k++, j++, paddr += hp->io_page_size) { + hp->gatt[j] = HP_ZX1_PDIR_VALID_BIT | paddr; + } + } + + agp_bridge->driver->tlb_flush(mem); + return 0; +} + +static int +hp_zx1_remove_memory (struct agp_memory *mem, off_t pg_start, int type) +{ + struct _hp_private *hp = &hp_private; + int i, io_pg_start, io_pg_count; + + if (type != mem->type || + agp_bridge->driver->agp_type_to_mask_type(agp_bridge, type)) { + return -EINVAL; + } + + io_pg_start = hp->io_pages_per_kpage * pg_start; + io_pg_count = hp->io_pages_per_kpage * mem->page_count; + for (i = io_pg_start; i < io_pg_count + io_pg_start; i++) { + hp->gatt[i] = agp_bridge->scratch_page; + } + + agp_bridge->driver->tlb_flush(mem); + return 0; +} + +static unsigned long +hp_zx1_mask_memory (struct agp_bridge_data *bridge, dma_addr_t addr, int type) +{ + return HP_ZX1_PDIR_VALID_BIT | addr; +} + +static void +hp_zx1_enable (struct agp_bridge_data *bridge, u32 mode) +{ + struct _hp_private *hp = &hp_private; + u32 command; + + command = readl(hp->lba_regs+hp->lba_cap_offset+PCI_AGP_STATUS); + command = agp_collect_device_status(bridge, mode, command); + command |= 0x00000100; + + writel(command, hp->lba_regs+hp->lba_cap_offset+PCI_AGP_COMMAND); + + agp_device_command(command, (mode & AGP8X_MODE) != 0); +} + +const struct agp_bridge_driver hp_zx1_driver = { + .owner = THIS_MODULE, + .size_type = FIXED_APER_SIZE, + .configure = hp_zx1_configure, + .fetch_size = hp_zx1_fetch_size, + .cleanup = hp_zx1_cleanup, + .tlb_flush = hp_zx1_tlbflush, + .mask_memory = hp_zx1_mask_memory, + .masks = hp_zx1_masks, + .agp_enable = hp_zx1_enable, + .cache_flush = global_cache_flush, + .create_gatt_table = hp_zx1_create_gatt_table, + .free_gatt_table = hp_zx1_free_gatt_table, + .insert_memory = hp_zx1_insert_memory, + .remove_memory = hp_zx1_remove_memory, + .alloc_by_type = agp_generic_alloc_by_type, + .free_by_type = agp_generic_free_by_type, + .agp_alloc_page = agp_generic_alloc_page, + .agp_alloc_pages = agp_generic_alloc_pages, + .agp_destroy_page = agp_generic_destroy_page, + .agp_destroy_pages = agp_generic_destroy_pages, + .agp_type_to_mask_type = agp_generic_type_to_mask_type, + .cant_use_aperture = true, +}; + +static int __init +hp_zx1_setup (u64 ioc_hpa, u64 lba_hpa) +{ + struct agp_bridge_data *bridge; + int error = 0; + + error = hp_zx1_ioc_init(ioc_hpa); + if (error) + goto fail; + + error = hp_zx1_lba_init(lba_hpa); + if (error) + goto fail; + + bridge = agp_alloc_bridge(); + if (!bridge) { + error = -ENOMEM; + goto fail; + } + bridge->driver = &hp_zx1_driver; + + fake_bridge_dev.vendor = PCI_VENDOR_ID_HP; + fake_bridge_dev.device = PCI_DEVICE_ID_HP_PCIX_LBA; + bridge->dev = &fake_bridge_dev; + + error = agp_add_bridge(bridge); + fail: + if (error) + hp_zx1_cleanup(); + return error; +} + +static acpi_status __init +zx1_gart_probe (acpi_handle obj, u32 depth, void *context, void **ret) +{ + acpi_handle handle, parent; + acpi_status status; + struct acpi_device_info *info; + u64 lba_hpa, sba_hpa, length; + int match; + + status = hp_acpi_csr_space(obj, &lba_hpa, &length); + if (ACPI_FAILURE(status)) + return AE_OK; /* keep looking for another bridge */ + + /* Look for an enclosing IOC scope and find its CSR space */ + handle = obj; + do { + status = acpi_get_object_info(handle, &info); + if (ACPI_SUCCESS(status) && (info->valid & ACPI_VALID_HID)) { + /* TBD check _CID also */ + match = (strcmp(info->hardware_id.string, "HWP0001") == 0); + kfree(info); + if (match) { + status = hp_acpi_csr_space(handle, &sba_hpa, &length); + if (ACPI_SUCCESS(status)) + break; + else { + printk(KERN_ERR PFX "Detected HP ZX1 " + "AGP LBA but no IOC.\n"); + return AE_OK; + } + } + } + + status = acpi_get_parent(handle, &parent); + handle = parent; + } while (ACPI_SUCCESS(status)); + + if (ACPI_FAILURE(status)) + return AE_OK; /* found no enclosing IOC */ + + if (hp_zx1_setup(sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa)) + return AE_OK; + + printk(KERN_INFO PFX "Detected HP ZX1 %s AGP chipset " + "(ioc=%llx, lba=%llx)\n", (char *)context, + sba_hpa + HP_ZX1_IOC_OFFSET, lba_hpa); + + hp_zx1_gart_found = 1; + return AE_CTRL_TERMINATE; /* we only support one bridge; quit looking */ +} + +static int __init +agp_hp_init (void) +{ + if (agp_off) + return -EINVAL; + + acpi_get_devices("HWP0003", zx1_gart_probe, "HWP0003", NULL); + if (hp_zx1_gart_found) + return 0; + + acpi_get_devices("HWP0007", zx1_gart_probe, "HWP0007", NULL); + if (hp_zx1_gart_found) + return 0; + + return -ENODEV; +} + +static void __exit +agp_hp_cleanup (void) +{ +} + +module_init(agp_hp_init); +module_exit(agp_hp_cleanup); + +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/char/agp/i460-agp.c b/drivers/char/agp/i460-agp.c new file mode 100644 index 000000000000..15b240ea4848 --- /dev/null +++ b/drivers/char/agp/i460-agp.c @@ -0,0 +1,659 @@ +/* + * For documentation on the i460 AGP interface, see Chapter 7 (AGP Subsystem) of + * the "Intel 460GTX Chipset Software Developer's Manual": + * http://www.intel.com/design/archives/itanium/downloads/248704.htm + */ +/* + * 460GX support by Chris Ahna + * Clean up & simplification by David Mosberger-Tang + */ +#include +#include +#include +#include +#include +#include +#include + +#include "agp.h" + +#define INTEL_I460_BAPBASE 0x98 +#define INTEL_I460_GXBCTL 0xa0 +#define INTEL_I460_AGPSIZ 0xa2 +#define INTEL_I460_ATTBASE 0xfe200000 +#define INTEL_I460_GATT_VALID (1UL << 24) +#define INTEL_I460_GATT_COHERENT (1UL << 25) + +/* + * The i460 can operate with large (4MB) pages, but there is no sane way to support this + * within the current kernel/DRM environment, so we disable the relevant code for now. + * See also comments in ia64_alloc_page()... + */ +#define I460_LARGE_IO_PAGES 0 + +#if I460_LARGE_IO_PAGES +# define I460_IO_PAGE_SHIFT i460.io_page_shift +#else +# define I460_IO_PAGE_SHIFT 12 +#endif + +#define I460_IOPAGES_PER_KPAGE (PAGE_SIZE >> I460_IO_PAGE_SHIFT) +#define I460_KPAGES_PER_IOPAGE (1 << (I460_IO_PAGE_SHIFT - PAGE_SHIFT)) +#define I460_SRAM_IO_DISABLE (1 << 4) +#define I460_BAPBASE_ENABLE (1 << 3) +#define I460_AGPSIZ_MASK 0x7 +#define I460_4M_PS (1 << 1) + +/* Control bits for Out-Of-GART coherency and Burst Write Combining */ +#define I460_GXBCTL_OOG (1UL << 0) +#define I460_GXBCTL_BWC (1UL << 2) + +/* + * gatt_table entries are 32-bits wide on the i460; the generic code ought to declare the + * gatt_table and gatt_table_real pointers a "void *"... + */ +#define RD_GATT(index) readl((u32 *) i460.gatt + (index)) +#define WR_GATT(index, val) writel((val), (u32 *) i460.gatt + (index)) +/* + * The 460 spec says we have to read the last location written to make sure that all + * writes have taken effect + */ +#define WR_FLUSH_GATT(index) RD_GATT(index) + +static unsigned long i460_mask_memory (struct agp_bridge_data *bridge, + dma_addr_t addr, int type); + +static struct { + void *gatt; /* ioremap'd GATT area */ + + /* i460 supports multiple GART page sizes, so GART pageshift is dynamic: */ + u8 io_page_shift; + + /* BIOS configures chipset to one of 2 possible apbase values: */ + u8 dynamic_apbase; + + /* structure for tracking partial use of 4MB GART pages: */ + struct lp_desc { + unsigned long *alloced_map; /* bitmap of kernel-pages in use */ + int refcount; /* number of kernel pages using the large page */ + u64 paddr; /* physical address of large page */ + struct page *page; /* page pointer */ + } *lp_desc; +} i460; + +static const struct aper_size_info_8 i460_sizes[3] = +{ + /* + * The 32GB aperture is only available with a 4M GART page size. Due to the + * dynamic GART page size, we can't figure out page_order or num_entries until + * runtime. + */ + {32768, 0, 0, 4}, + {1024, 0, 0, 2}, + {256, 0, 0, 1} +}; + +static struct gatt_mask i460_masks[] = +{ + { + .mask = INTEL_I460_GATT_VALID | INTEL_I460_GATT_COHERENT, + .type = 0 + } +}; + +static int i460_fetch_size (void) +{ + int i; + u8 temp; + struct aper_size_info_8 *values; + + /* Determine the GART page size */ + pci_read_config_byte(agp_bridge->dev, INTEL_I460_GXBCTL, &temp); + i460.io_page_shift = (temp & I460_4M_PS) ? 22 : 12; + pr_debug("i460_fetch_size: io_page_shift=%d\n", i460.io_page_shift); + + if (i460.io_page_shift != I460_IO_PAGE_SHIFT) { + printk(KERN_ERR PFX + "I/O (GART) page-size %luKB doesn't match expected " + "size %luKB\n", + 1UL << (i460.io_page_shift - 10), + 1UL << (I460_IO_PAGE_SHIFT)); + return 0; + } + + values = A_SIZE_8(agp_bridge->driver->aperture_sizes); + + pci_read_config_byte(agp_bridge->dev, INTEL_I460_AGPSIZ, &temp); + + /* Exit now if the IO drivers for the GART SRAMS are turned off */ + if (temp & I460_SRAM_IO_DISABLE) { + printk(KERN_ERR PFX "GART SRAMS disabled on 460GX chipset\n"); + printk(KERN_ERR PFX "AGPGART operation not possible\n"); + return 0; + } + + /* Make sure we don't try to create an 2 ^ 23 entry GATT */ + if ((i460.io_page_shift == 0) && ((temp & I460_AGPSIZ_MASK) == 4)) { + printk(KERN_ERR PFX "We can't have a 32GB aperture with 4KB GART pages\n"); + return 0; + } + + /* Determine the proper APBASE register */ + if (temp & I460_BAPBASE_ENABLE) + i460.dynamic_apbase = INTEL_I460_BAPBASE; + else + i460.dynamic_apbase = AGP_APBASE; + + for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) { + /* + * Dynamically calculate the proper num_entries and page_order values for + * the define aperture sizes. Take care not to shift off the end of + * values[i].size. + */ + values[i].num_entries = (values[i].size << 8) >> (I460_IO_PAGE_SHIFT - 12); + values[i].page_order = ilog2((sizeof(u32)*values[i].num_entries) >> PAGE_SHIFT); + } + + for (i = 0; i < agp_bridge->driver->num_aperture_sizes; i++) { + /* Neglect control bits when matching up size_value */ + if ((temp & I460_AGPSIZ_MASK) == values[i].size_value) { + agp_bridge->previous_size = agp_bridge->current_size = (void *) (values + i); + agp_bridge->aperture_size_idx = i; + return values[i].size; + } + } + + return 0; +} + +/* There isn't anything to do here since 460 has no GART TLB. */ +static void i460_tlb_flush (struct agp_memory *mem) +{ + return; +} + +/* + * This utility function is needed to prevent corruption of the control bits + * which are stored along with the aperture size in 460's AGPSIZ register + */ +static void i460_write_agpsiz (u8 size_value) +{ + u8 temp; + + pci_read_config_byte(agp_bridge->dev, INTEL_I460_AGPSIZ, &temp); + pci_write_config_byte(agp_bridge->dev, INTEL_I460_AGPSIZ, + ((temp & ~I460_AGPSIZ_MASK) | size_value)); +} + +static void i460_cleanup (void) +{ + struct aper_size_info_8 *previous_size; + + previous_size = A_SIZE_8(agp_bridge->previous_size); + i460_write_agpsiz(previous_size->size_value); + + if (I460_IO_PAGE_SHIFT > PAGE_SHIFT) + kfree(i460.lp_desc); +} + +static int i460_configure (void) +{ + union { + u32 small[2]; + u64 large; + } temp; + size_t size; + u8 scratch; + struct aper_size_info_8 *current_size; + + temp.large = 0; + + current_size = A_SIZE_8(agp_bridge->current_size); + i460_write_agpsiz(current_size->size_value); + + /* + * Do the necessary rigmarole to read all eight bytes of APBASE. + * This has to be done since the AGP aperture can be above 4GB on + * 460 based systems. + */ + pci_read_config_dword(agp_bridge->dev, i460.dynamic_apbase, &(temp.small[0])); + pci_read_config_dword(agp_bridge->dev, i460.dynamic_apbase + 4, &(temp.small[1])); + + /* Clear BAR control bits */ + agp_bridge->gart_bus_addr = temp.large & ~((1UL << 3) - 1); + + pci_read_config_byte(agp_bridge->dev, INTEL_I460_GXBCTL, &scratch); + pci_write_config_byte(agp_bridge->dev, INTEL_I460_GXBCTL, + (scratch & 0x02) | I460_GXBCTL_OOG | I460_GXBCTL_BWC); + + /* + * Initialize partial allocation trackers if a GART page is bigger than a kernel + * page. + */ + if (I460_IO_PAGE_SHIFT > PAGE_SHIFT) { + size = current_size->num_entries * sizeof(i460.lp_desc[0]); + i460.lp_desc = kzalloc(size, GFP_KERNEL); + if (!i460.lp_desc) + return -ENOMEM; + } + return 0; +} + +static int i460_create_gatt_table (struct agp_bridge_data *bridge) +{ + int page_order, num_entries, i; + void *temp; + + /* + * Load up the fixed address of the GART SRAMS which hold our GATT table. + */ + temp = agp_bridge->current_size; + page_order = A_SIZE_8(temp)->page_order; + num_entries = A_SIZE_8(temp)->num_entries; + + i460.gatt = ioremap(INTEL_I460_ATTBASE, PAGE_SIZE << page_order); + if (!i460.gatt) { + printk(KERN_ERR PFX "ioremap failed\n"); + return -ENOMEM; + } + + /* These are no good, the should be removed from the agp_bridge strucure... */ + agp_bridge->gatt_table_real = NULL; + agp_bridge->gatt_table = NULL; + agp_bridge->gatt_bus_addr = 0; + + for (i = 0; i < num_entries; ++i) + WR_GATT(i, 0); + WR_FLUSH_GATT(i - 1); + return 0; +} + +static int i460_free_gatt_table (struct agp_bridge_data *bridge) +{ + int num_entries, i; + void *temp; + + temp = agp_bridge->current_size; + + num_entries = A_SIZE_8(temp)->num_entries; + + for (i = 0; i < num_entries; ++i) + WR_GATT(i, 0); + WR_FLUSH_GATT(num_entries - 1); + + iounmap(i460.gatt); + return 0; +} + +/* + * The following functions are called when the I/O (GART) page size is smaller than + * PAGE_SIZE. + */ + +static int i460_insert_memory_small_io_page (struct agp_memory *mem, + off_t pg_start, int type) +{ + unsigned long paddr, io_pg_start, io_page_size; + int i, j, k, num_entries; + void *temp; + + pr_debug("i460_insert_memory_small_io_page(mem=%p, pg_start=%ld, type=%d, paddr0=0x%lx)\n", + mem, pg_start, type, page_to_phys(mem->pages[0])); + + if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) + return -EINVAL; + + io_pg_start = I460_IOPAGES_PER_KPAGE * pg_start; + + temp = agp_bridge->current_size; + num_entries = A_SIZE_8(temp)->num_entries; + + if ((io_pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count) > num_entries) { + printk(KERN_ERR PFX "Looks like we're out of AGP memory\n"); + return -EINVAL; + } + + j = io_pg_start; + while (j < (io_pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count)) { + if (!PGE_EMPTY(agp_bridge, RD_GATT(j))) { + pr_debug("i460_insert_memory_small_io_page: GATT[%d]=0x%x is busy\n", + j, RD_GATT(j)); + return -EBUSY; + } + j++; + } + + io_page_size = 1UL << I460_IO_PAGE_SHIFT; + for (i = 0, j = io_pg_start; i < mem->page_count; i++) { + paddr = page_to_phys(mem->pages[i]); + for (k = 0; k < I460_IOPAGES_PER_KPAGE; k++, j++, paddr += io_page_size) + WR_GATT(j, i460_mask_memory(agp_bridge, paddr, mem->type)); + } + WR_FLUSH_GATT(j - 1); + return 0; +} + +static int i460_remove_memory_small_io_page(struct agp_memory *mem, + off_t pg_start, int type) +{ + int i; + + pr_debug("i460_remove_memory_small_io_page(mem=%p, pg_start=%ld, type=%d)\n", + mem, pg_start, type); + + pg_start = I460_IOPAGES_PER_KPAGE * pg_start; + + for (i = pg_start; i < (pg_start + I460_IOPAGES_PER_KPAGE * mem->page_count); i++) + WR_GATT(i, 0); + WR_FLUSH_GATT(i - 1); + return 0; +} + +#if I460_LARGE_IO_PAGES + +/* + * These functions are called when the I/O (GART) page size exceeds PAGE_SIZE. + * + * This situation is interesting since AGP memory allocations that are smaller than a + * single GART page are possible. The i460.lp_desc array tracks partial allocation of the + * large GART pages to work around this issue. + * + * i460.lp_desc[pg_num].refcount tracks the number of kernel pages in use within GART page + * pg_num. i460.lp_desc[pg_num].paddr is the physical address of the large page and + * i460.lp_desc[pg_num].alloced_map is a bitmap of kernel pages that are in use (allocated). + */ + +static int i460_alloc_large_page (struct lp_desc *lp) +{ + unsigned long order = I460_IO_PAGE_SHIFT - PAGE_SHIFT; + size_t map_size; + + lp->page = alloc_pages(GFP_KERNEL, order); + if (!lp->page) { + printk(KERN_ERR PFX "Couldn't alloc 4M GART page...\n"); + return -ENOMEM; + } + + map_size = ((I460_KPAGES_PER_IOPAGE + BITS_PER_LONG - 1) & -BITS_PER_LONG)/8; + lp->alloced_map = kzalloc(map_size, GFP_KERNEL); + if (!lp->alloced_map) { + __free_pages(lp->page, order); + printk(KERN_ERR PFX "Out of memory, we're in trouble...\n"); + return -ENOMEM; + } + + lp->paddr = page_to_phys(lp->page); + lp->refcount = 0; + atomic_add(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp); + return 0; +} + +static void i460_free_large_page (struct lp_desc *lp) +{ + kfree(lp->alloced_map); + lp->alloced_map = NULL; + + __free_pages(lp->page, I460_IO_PAGE_SHIFT - PAGE_SHIFT); + atomic_sub(I460_KPAGES_PER_IOPAGE, &agp_bridge->current_memory_agp); +} + +static int i460_insert_memory_large_io_page (struct agp_memory *mem, + off_t pg_start, int type) +{ + int i, start_offset, end_offset, idx, pg, num_entries; + struct lp_desc *start, *end, *lp; + void *temp; + + if (type >= AGP_USER_TYPES || mem->type >= AGP_USER_TYPES) + return -EINVAL; + + temp = agp_bridge->current_size; + num_entries = A_SIZE_8(temp)->num_entries; + + /* Figure out what pg_start means in terms of our large GART pages */ + start = &i460.lp_desc[pg_start / I460_KPAGES_PER_IOPAGE]; + end = &i460.lp_desc[(pg_start + mem->page_count - 1) / I460_KPAGES_PER_IOPAGE]; + start_offset = pg_start % I460_KPAGES_PER_IOPAGE; + end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_IOPAGE; + + if (end > i460.lp_desc + num_entries) { + printk(KERN_ERR PFX "Looks like we're out of AGP memory\n"); + return -EINVAL; + } + + /* Check if the requested region of the aperture is free */ + for (lp = start; lp <= end; ++lp) { + if (!lp->alloced_map) + continue; /* OK, the entire large page is available... */ + + for (idx = ((lp == start) ? start_offset : 0); + idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); + idx++) + { + if (test_bit(idx, lp->alloced_map)) + return -EBUSY; + } + } + + for (lp = start, i = 0; lp <= end; ++lp) { + if (!lp->alloced_map) { + /* Allocate new GART pages... */ + if (i460_alloc_large_page(lp) < 0) + return -ENOMEM; + pg = lp - i460.lp_desc; + WR_GATT(pg, i460_mask_memory(agp_bridge, + lp->paddr, 0)); + WR_FLUSH_GATT(pg); + } + + for (idx = ((lp == start) ? start_offset : 0); + idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); + idx++, i++) + { + mem->pages[i] = lp->page; + __set_bit(idx, lp->alloced_map); + ++lp->refcount; + } + } + return 0; +} + +static int i460_remove_memory_large_io_page (struct agp_memory *mem, + off_t pg_start, int type) +{ + int i, pg, start_offset, end_offset, idx, num_entries; + struct lp_desc *start, *end, *lp; + void *temp; + + temp = agp_bridge->current_size; + num_entries = A_SIZE_8(temp)->num_entries; + + /* Figure out what pg_start means in terms of our large GART pages */ + start = &i460.lp_desc[pg_start / I460_KPAGES_PER_IOPAGE]; + end = &i460.lp_desc[(pg_start + mem->page_count - 1) / I460_KPAGES_PER_IOPAGE]; + start_offset = pg_start % I460_KPAGES_PER_IOPAGE; + end_offset = (pg_start + mem->page_count - 1) % I460_KPAGES_PER_IOPAGE; + + for (i = 0, lp = start; lp <= end; ++lp) { + for (idx = ((lp == start) ? start_offset : 0); + idx < ((lp == end) ? (end_offset + 1) : I460_KPAGES_PER_IOPAGE); + idx++, i++) + { + mem->pages[i] = NULL; + __clear_bit(idx, lp->alloced_map); + --lp->refcount; + } + + /* Free GART pages if they are unused */ + if (lp->refcount == 0) { + pg = lp - i460.lp_desc; + WR_GATT(pg, 0); + WR_FLUSH_GATT(pg); + i460_free_large_page(lp); + } + } + return 0; +} + +/* Wrapper routines to call the approriate {small_io_page,large_io_page} function */ + +static int i460_insert_memory (struct agp_memory *mem, + off_t pg_start, int type) +{ + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + return i460_insert_memory_small_io_page(mem, pg_start, type); + else + return i460_insert_memory_large_io_page(mem, pg_start, type); +} + +static int i460_remove_memory (struct agp_memory *mem, + off_t pg_start, int type) +{ + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) + return i460_remove_memory_small_io_page(mem, pg_start, type); + else + return i460_remove_memory_large_io_page(mem, pg_start, type); +} + +/* + * If the I/O (GART) page size is bigger than the kernel page size, we don't want to + * allocate memory until we know where it is to be bound in the aperture (a + * multi-kernel-page alloc might fit inside of an already allocated GART page). + * + * Let's just hope nobody counts on the allocated AGP memory being there before bind time + * (I don't think current drivers do)... + */ +static struct page *i460_alloc_page (struct agp_bridge_data *bridge) +{ + void *page; + + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) { + page = agp_generic_alloc_page(agp_bridge); + } else + /* Returning NULL would cause problems */ + /* AK: really dubious code. */ + page = (void *)~0UL; + return page; +} + +static void i460_destroy_page (struct page *page, int flags) +{ + if (I460_IO_PAGE_SHIFT <= PAGE_SHIFT) { + agp_generic_destroy_page(page, flags); + } +} + +#endif /* I460_LARGE_IO_PAGES */ + +static unsigned long i460_mask_memory (struct agp_bridge_data *bridge, + dma_addr_t addr, int type) +{ + /* Make sure the returned address is a valid GATT entry */ + return bridge->driver->masks[0].mask + | (((addr & ~((1 << I460_IO_PAGE_SHIFT) - 1)) & 0xfffff000) >> 12); +} + +const struct agp_bridge_driver intel_i460_driver = { + .owner = THIS_MODULE, + .aperture_sizes = i460_sizes, + .size_type = U8_APER_SIZE, + .num_aperture_sizes = 3, + .configure = i460_configure, + .fetch_size = i460_fetch_size, + .cleanup = i460_cleanup, + .tlb_flush = i460_tlb_flush, + .mask_memory = i460_mask_memory, + .masks = i460_masks, + .agp_enable = agp_generic_enable, + .cache_flush = global_cache_flush, + .create_gatt_table = i460_create_gatt_table, + .free_gatt_table = i460_free_gatt_table, +#if I460_LARGE_IO_PAGES + .insert_memory = i460_insert_memory, + .remove_memory = i460_remove_memory, + .agp_alloc_page = i460_alloc_page, + .agp_destroy_page = i460_destroy_page, +#else + .insert_memory = i460_insert_memory_small_io_page, + .remove_memory = i460_remove_memory_small_io_page, + .agp_alloc_page = agp_generic_alloc_page, + .agp_alloc_pages = agp_generic_alloc_pages, + .agp_destroy_page = agp_generic_destroy_page, + .agp_destroy_pages = agp_generic_destroy_pages, +#endif + .alloc_by_type = agp_generic_alloc_by_type, + .free_by_type = agp_generic_free_by_type, + .agp_type_to_mask_type = agp_generic_type_to_mask_type, + .cant_use_aperture = true, +}; + +static int agp_intel_i460_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct agp_bridge_data *bridge; + u8 cap_ptr; + + cap_ptr = pci_find_capability(pdev, PCI_CAP_ID_AGP); + if (!cap_ptr) + return -ENODEV; + + bridge = agp_alloc_bridge(); + if (!bridge) + return -ENOMEM; + + bridge->driver = &intel_i460_driver; + bridge->dev = pdev; + bridge->capndx = cap_ptr; + + printk(KERN_INFO PFX "Detected Intel 460GX chipset\n"); + + pci_set_drvdata(pdev, bridge); + return agp_add_bridge(bridge); +} + +static void agp_intel_i460_remove(struct pci_dev *pdev) +{ + struct agp_bridge_data *bridge = pci_get_drvdata(pdev); + + agp_remove_bridge(bridge); + agp_put_bridge(bridge); +} + +static struct pci_device_id agp_intel_i460_pci_table[] = { + { + .class = (PCI_CLASS_BRIDGE_HOST << 8), + .class_mask = ~0, + .vendor = PCI_VENDOR_ID_INTEL, + .device = PCI_DEVICE_ID_INTEL_84460GX, + .subvendor = PCI_ANY_ID, + .subdevice = PCI_ANY_ID, + }, + { } +}; + +MODULE_DEVICE_TABLE(pci, agp_intel_i460_pci_table); + +static struct pci_driver agp_intel_i460_pci_driver = { + .name = "agpgart-intel-i460", + .id_table = agp_intel_i460_pci_table, + .probe = agp_intel_i460_probe, + .remove = agp_intel_i460_remove, +}; + +static int __init agp_intel_i460_init(void) +{ + if (agp_off) + return -EINVAL; + return pci_register_driver(&agp_intel_i460_pci_driver); +} + +static void __exit agp_intel_i460_cleanup(void) +{ + pci_unregister_driver(&agp_intel_i460_pci_driver); +} + +module_init(agp_intel_i460_init); +module_exit(agp_intel_i460_cleanup); + +MODULE_AUTHOR("Chris Ahna "); +MODULE_LICENSE("GPL and additional rights"); diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 9c90b1d2c036..f6acb2f78533 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -64,6 +64,25 @@ static DEFINE_MUTEX(hpet_mutex); /* replaces BKL */ static u32 hpet_nhpet, hpet_max_freq = HPET_USER_FREQ; +/* This clocksource driver currently only works on ia64 */ +#ifdef CONFIG_IA64 +static void __iomem *hpet_mctr; + +static u64 read_hpet(struct clocksource *cs) +{ + return (u64)read_counter((void __iomem *)hpet_mctr); +} + +static struct clocksource clocksource_hpet = { + .name = "hpet", + .rating = 250, + .read = read_hpet, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; +static struct clocksource *hpet_clocksource; +#endif + /* A lock for concurrent access by app and isr hpet activity. */ static DEFINE_SPINLOCK(hpet_lock); @@ -887,6 +906,17 @@ int hpet_alloc(struct hpet_data *hdp) hpetp->hp_delta = hpet_calibrate(hpetp); +/* This clocksource driver currently only works on ia64 */ +#ifdef CONFIG_IA64 + if (!hpet_clocksource) { + hpet_mctr = (void __iomem *)&hpetp->hp_hpet->hpet_mc; + clocksource_hpet.archdata.fsys_mmio = hpet_mctr; + clocksource_register_hz(&clocksource_hpet, hpetp->hp_tick_freq); + hpetp->hp_clocksource = &clocksource_hpet; + hpet_clocksource = &clocksource_hpet; + } +#endif + return 0; } diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig index 442c40efb200..8de74dcfa18c 100644 --- a/drivers/char/hw_random/Kconfig +++ b/drivers/char/hw_random/Kconfig @@ -37,7 +37,7 @@ config HW_RANDOM_TIMERIOMEM config HW_RANDOM_INTEL tristate "Intel HW Random Number Generator support" - depends on (X86 || COMPILE_TEST) && PCI + depends on (X86 || IA64 || COMPILE_TEST) && PCI default HW_RANDOM help This driver provides kernel-side support for the Random Number diff --git a/drivers/char/mem.c b/drivers/char/mem.c index 3c6670cf905f..263c19cd6fb9 100644 --- a/drivers/char/mem.c +++ b/drivers/char/mem.c @@ -31,6 +31,10 @@ #include #include +#ifdef CONFIG_IA64 +# include +#endif + #define DEVMEM_MINOR 1 #define DEVPORT_MINOR 4 @@ -273,6 +277,13 @@ int __weak phys_mem_access_prot_allowed(struct file *file, #ifdef pgprot_noncached static int uncached_access(struct file *file, phys_addr_t addr) { +#if defined(CONFIG_IA64) + /* + * On ia64, we ignore O_DSYNC because we cannot tolerate memory + * attribute aliases. + */ + return !(efi_mem_attributes(addr) & EFI_MEMORY_WB); +#else /* * Accessing memory above the top the kernel knows about or through a * file pointer @@ -281,6 +292,7 @@ static int uncached_access(struct file *file, phys_addr_t addr) if (file->f_flags & O_DSYNC) return 1; return addr >= __pa(high_memory); +#endif } #endif diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c new file mode 100644 index 000000000000..b35f651837c8 --- /dev/null +++ b/drivers/char/mspec.c @@ -0,0 +1,295 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2001-2006 Silicon Graphics, Inc. All rights + * reserved. + */ + +/* + * SN Platform Special Memory (mspec) Support + * + * This driver exports the SN special memory (mspec) facility to user + * processes. + * There are two types of memory made available thru this driver: + * uncached and cached. + * + * Uncached are used for memory write combining feature of the ia64 + * cpu. + * + * Cached are used for areas of memory that are used as cached addresses + * on our partition and used as uncached addresses from other partitions. + * Due to a design constraint of the SN2 Shub, you can not have processors + * on the same FSB perform both a cached and uncached reference to the + * same cache line. These special memory cached regions prevent the + * kernel from ever dropping in a TLB entry and therefore prevent the + * processor from ever speculating a cache line from this page. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define CACHED_ID "Cached," +#define UNCACHED_ID "Uncached" +#define REVISION "4.0" +#define MSPEC_BASENAME "mspec" + +/* + * Page types allocated by the device. + */ +enum mspec_page_type { + MSPEC_CACHED = 2, + MSPEC_UNCACHED +}; + +/* + * One of these structures is allocated when an mspec region is mmaped. The + * structure is pointed to by the vma->vm_private_data field in the vma struct. + * This structure is used to record the addresses of the mspec pages. + * This structure is shared by all vma's that are split off from the + * original vma when split_vma()'s are done. + * + * The refcnt is incremented atomically because mm->mmap_lock does not + * protect in fork case where multiple tasks share the vma_data. + */ +struct vma_data { + refcount_t refcnt; /* Number of vmas sharing the data. */ + spinlock_t lock; /* Serialize access to this structure. */ + int count; /* Number of pages allocated. */ + enum mspec_page_type type; /* Type of pages allocated. */ + unsigned long vm_start; /* Original (unsplit) base. */ + unsigned long vm_end; /* Original (unsplit) end. */ + unsigned long maddr[]; /* Array of MSPEC addresses. */ +}; + +/* + * mspec_open + * + * Called when a device mapping is created by a means other than mmap + * (via fork, munmap, etc.). Increments the reference count on the + * underlying mspec data so it is not freed prematurely. + */ +static void +mspec_open(struct vm_area_struct *vma) +{ + struct vma_data *vdata; + + vdata = vma->vm_private_data; + refcount_inc(&vdata->refcnt); +} + +/* + * mspec_close + * + * Called when unmapping a device mapping. Frees all mspec pages + * belonging to all the vma's sharing this vma_data structure. + */ +static void +mspec_close(struct vm_area_struct *vma) +{ + struct vma_data *vdata; + int index, last_index; + unsigned long my_page; + + vdata = vma->vm_private_data; + + if (!refcount_dec_and_test(&vdata->refcnt)) + return; + + last_index = (vdata->vm_end - vdata->vm_start) >> PAGE_SHIFT; + for (index = 0; index < last_index; index++) { + if (vdata->maddr[index] == 0) + continue; + /* + * Clear the page before sticking it back + * into the pool. + */ + my_page = vdata->maddr[index]; + vdata->maddr[index] = 0; + memset((char *)my_page, 0, PAGE_SIZE); + uncached_free_page(my_page, 1); + } + + kvfree(vdata); +} + +/* + * mspec_fault + * + * Creates a mspec page and maps it to user space. + */ +static vm_fault_t +mspec_fault(struct vm_fault *vmf) +{ + unsigned long paddr, maddr; + unsigned long pfn; + pgoff_t index = vmf->pgoff; + struct vma_data *vdata = vmf->vma->vm_private_data; + + maddr = (volatile unsigned long) vdata->maddr[index]; + if (maddr == 0) { + maddr = uncached_alloc_page(numa_node_id(), 1); + if (maddr == 0) + return VM_FAULT_OOM; + + spin_lock(&vdata->lock); + if (vdata->maddr[index] == 0) { + vdata->count++; + vdata->maddr[index] = maddr; + } else { + uncached_free_page(maddr, 1); + maddr = vdata->maddr[index]; + } + spin_unlock(&vdata->lock); + } + + paddr = maddr & ~__IA64_UNCACHED_OFFSET; + pfn = paddr >> PAGE_SHIFT; + + return vmf_insert_pfn(vmf->vma, vmf->address, pfn); +} + +static const struct vm_operations_struct mspec_vm_ops = { + .open = mspec_open, + .close = mspec_close, + .fault = mspec_fault, +}; + +/* + * mspec_mmap + * + * Called when mmapping the device. Initializes the vma with a fault handler + * and private data structure necessary to allocate, track, and free the + * underlying pages. + */ +static int +mspec_mmap(struct file *file, struct vm_area_struct *vma, + enum mspec_page_type type) +{ + struct vma_data *vdata; + int pages, vdata_size; + + if (vma->vm_pgoff != 0) + return -EINVAL; + + if ((vma->vm_flags & VM_SHARED) == 0) + return -EINVAL; + + if ((vma->vm_flags & VM_WRITE) == 0) + return -EPERM; + + pages = vma_pages(vma); + vdata_size = sizeof(struct vma_data) + pages * sizeof(long); + vdata = kvzalloc(vdata_size, GFP_KERNEL); + if (!vdata) + return -ENOMEM; + + vdata->vm_start = vma->vm_start; + vdata->vm_end = vma->vm_end; + vdata->type = type; + spin_lock_init(&vdata->lock); + refcount_set(&vdata->refcnt, 1); + vma->vm_private_data = vdata; + + vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP); + if (vdata->type == MSPEC_UNCACHED) + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &mspec_vm_ops; + + return 0; +} + +static int +cached_mmap(struct file *file, struct vm_area_struct *vma) +{ + return mspec_mmap(file, vma, MSPEC_CACHED); +} + +static int +uncached_mmap(struct file *file, struct vm_area_struct *vma) +{ + return mspec_mmap(file, vma, MSPEC_UNCACHED); +} + +static const struct file_operations cached_fops = { + .owner = THIS_MODULE, + .mmap = cached_mmap, + .llseek = noop_llseek, +}; + +static struct miscdevice cached_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mspec_cached", + .fops = &cached_fops +}; + +static const struct file_operations uncached_fops = { + .owner = THIS_MODULE, + .mmap = uncached_mmap, + .llseek = noop_llseek, +}; + +static struct miscdevice uncached_miscdev = { + .minor = MISC_DYNAMIC_MINOR, + .name = "mspec_uncached", + .fops = &uncached_fops +}; + +/* + * mspec_init + * + * Called at boot time to initialize the mspec facility. + */ +static int __init +mspec_init(void) +{ + int ret; + + ret = misc_register(&cached_miscdev); + if (ret) { + printk(KERN_ERR "%s: failed to register device %i\n", + CACHED_ID, ret); + return ret; + } + ret = misc_register(&uncached_miscdev); + if (ret) { + printk(KERN_ERR "%s: failed to register device %i\n", + UNCACHED_ID, ret); + misc_deregister(&cached_miscdev); + return ret; + } + + printk(KERN_INFO "%s %s initialized devices: %s %s\n", + MSPEC_BASENAME, REVISION, CACHED_ID, UNCACHED_ID); + + return 0; +} + +static void __exit +mspec_exit(void) +{ + misc_deregister(&uncached_miscdev); + misc_deregister(&cached_miscdev); +} + +module_init(mspec_init); +module_exit(mspec_exit); + +MODULE_AUTHOR("Silicon Graphics, Inc. "); +MODULE_DESCRIPTION("Driver for SGI SN special memory operations"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig index 35efb53d5492..f429b9b37b76 100644 --- a/drivers/cpufreq/Kconfig +++ b/drivers/cpufreq/Kconfig @@ -239,6 +239,17 @@ if PPC32 || PPC64 source "drivers/cpufreq/Kconfig.powerpc" endif +if IA64 +config IA64_ACPI_CPUFREQ + tristate "ACPI Processor P-States driver" + depends on ACPI_PROCESSOR + help + This driver adds a CPUFreq driver which utilizes the ACPI + Processor Performance States. + + If in doubt, say N. +endif + if MIPS config BMIPS_CPUFREQ tristate "BMIPS CPUfreq Driver" diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 8d141c71b016..ef8510774913 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -102,6 +102,7 @@ obj-$(CONFIG_POWERNV_CPUFREQ) += powernv-cpufreq.o ################################################################################## # Other platform drivers obj-$(CONFIG_BMIPS_CPUFREQ) += bmips-cpufreq.o +obj-$(CONFIG_IA64_ACPI_CPUFREQ) += ia64-acpi-cpufreq.o obj-$(CONFIG_LOONGSON2_CPUFREQ) += loongson2_cpufreq.o obj-$(CONFIG_SH_CPU_FREQ) += sh-cpufreq.o obj-$(CONFIG_SPARC_US2E_CPUFREQ) += sparc-us2e-cpufreq.o diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c new file mode 100644 index 000000000000..c6bdc455517f --- /dev/null +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -0,0 +1,353 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This file provides the ACPI based P-state support. This + * module works with generic cpufreq infrastructure. Most of + * the code is based on i386 version + * (arch/i386/kernel/cpu/cpufreq/acpi-cpufreq.c) + * + * Copyright (C) 2005 Intel Corp + * Venkatesh Pallipadi + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +MODULE_AUTHOR("Venkatesh Pallipadi"); +MODULE_DESCRIPTION("ACPI Processor P-States Driver"); +MODULE_LICENSE("GPL"); + +struct cpufreq_acpi_io { + struct acpi_processor_performance acpi_data; + unsigned int resume; +}; + +struct cpufreq_acpi_req { + unsigned int cpu; + unsigned int state; +}; + +static struct cpufreq_acpi_io *acpi_io_data[NR_CPUS]; + +static struct cpufreq_driver acpi_cpufreq_driver; + + +static int +processor_set_pstate ( + u32 value) +{ + s64 retval; + + pr_debug("processor_set_pstate\n"); + + retval = ia64_pal_set_pstate((u64)value); + + if (retval) { + pr_debug("Failed to set freq to 0x%x, with error 0x%llx\n", + value, retval); + return -ENODEV; + } + return (int)retval; +} + + +static int +processor_get_pstate ( + u32 *value) +{ + u64 pstate_index = 0; + s64 retval; + + pr_debug("processor_get_pstate\n"); + + retval = ia64_pal_get_pstate(&pstate_index, + PAL_GET_PSTATE_TYPE_INSTANT); + *value = (u32) pstate_index; + + if (retval) + pr_debug("Failed to get current freq with " + "error 0x%llx, idx 0x%x\n", retval, *value); + + return (int)retval; +} + + +/* To be used only after data->acpi_data is initialized */ +static unsigned +extract_clock ( + struct cpufreq_acpi_io *data, + unsigned value) +{ + unsigned long i; + + pr_debug("extract_clock\n"); + + for (i = 0; i < data->acpi_data.state_count; i++) { + if (value == data->acpi_data.states[i].status) + return data->acpi_data.states[i].core_frequency; + } + return data->acpi_data.states[i-1].core_frequency; +} + + +static long +processor_get_freq ( + void *arg) +{ + struct cpufreq_acpi_req *req = arg; + unsigned int cpu = req->cpu; + struct cpufreq_acpi_io *data = acpi_io_data[cpu]; + u32 value; + int ret; + + pr_debug("processor_get_freq\n"); + if (smp_processor_id() != cpu) + return -EAGAIN; + + /* processor_get_pstate gets the instantaneous frequency */ + ret = processor_get_pstate(&value); + if (ret) { + pr_warn("get performance failed with error %d\n", ret); + return ret; + } + return 1000 * extract_clock(data, value); +} + + +static long +processor_set_freq ( + void *arg) +{ + struct cpufreq_acpi_req *req = arg; + unsigned int cpu = req->cpu; + struct cpufreq_acpi_io *data = acpi_io_data[cpu]; + int ret, state = req->state; + u32 value; + + pr_debug("processor_set_freq\n"); + if (smp_processor_id() != cpu) + return -EAGAIN; + + if (state == data->acpi_data.state) { + if (unlikely(data->resume)) { + pr_debug("Called after resume, resetting to P%d\n", state); + data->resume = 0; + } else { + pr_debug("Already at target state (P%d)\n", state); + return 0; + } + } + + pr_debug("Transitioning from P%d to P%d\n", + data->acpi_data.state, state); + + /* + * First we write the target state's 'control' value to the + * control_register. + */ + value = (u32) data->acpi_data.states[state].control; + + pr_debug("Transitioning to state: 0x%08x\n", value); + + ret = processor_set_pstate(value); + if (ret) { + pr_warn("Transition failed with error %d\n", ret); + return -ENODEV; + } + + data->acpi_data.state = state; + return 0; +} + + +static unsigned int +acpi_cpufreq_get ( + unsigned int cpu) +{ + struct cpufreq_acpi_req req; + long ret; + + req.cpu = cpu; + ret = work_on_cpu(cpu, processor_get_freq, &req); + + return ret > 0 ? (unsigned int) ret : 0; +} + + +static int +acpi_cpufreq_target ( + struct cpufreq_policy *policy, + unsigned int index) +{ + struct cpufreq_acpi_req req; + + req.cpu = policy->cpu; + req.state = index; + + return work_on_cpu(req.cpu, processor_set_freq, &req); +} + +static int +acpi_cpufreq_cpu_init ( + struct cpufreq_policy *policy) +{ + unsigned int i; + unsigned int cpu = policy->cpu; + struct cpufreq_acpi_io *data; + unsigned int result = 0; + struct cpufreq_frequency_table *freq_table; + + pr_debug("acpi_cpufreq_cpu_init\n"); + + data = kzalloc(sizeof(*data), GFP_KERNEL); + if (!data) + return (-ENOMEM); + + acpi_io_data[cpu] = data; + + result = acpi_processor_register_performance(&data->acpi_data, cpu); + + if (result) + goto err_free; + + /* capability check */ + if (data->acpi_data.state_count <= 1) { + pr_debug("No P-States\n"); + result = -ENODEV; + goto err_unreg; + } + + if ((data->acpi_data.control_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE) || + (data->acpi_data.status_register.space_id != + ACPI_ADR_SPACE_FIXED_HARDWARE)) { + pr_debug("Unsupported address space [%d, %d]\n", + (u32) (data->acpi_data.control_register.space_id), + (u32) (data->acpi_data.status_register.space_id)); + result = -ENODEV; + goto err_unreg; + } + + /* alloc freq_table */ + freq_table = kcalloc(data->acpi_data.state_count + 1, + sizeof(*freq_table), + GFP_KERNEL); + if (!freq_table) { + result = -ENOMEM; + goto err_unreg; + } + + /* detect transition latency */ + policy->cpuinfo.transition_latency = 0; + for (i=0; iacpi_data.state_count; i++) { + if ((data->acpi_data.states[i].transition_latency * 1000) > + policy->cpuinfo.transition_latency) { + policy->cpuinfo.transition_latency = + data->acpi_data.states[i].transition_latency * 1000; + } + } + + /* table init */ + for (i = 0; i <= data->acpi_data.state_count; i++) + { + if (i < data->acpi_data.state_count) { + freq_table[i].frequency = + data->acpi_data.states[i].core_frequency * 1000; + } else { + freq_table[i].frequency = CPUFREQ_TABLE_END; + } + } + + policy->freq_table = freq_table; + + /* notify BIOS that we exist */ + acpi_processor_notify_smm(THIS_MODULE); + + pr_info("CPU%u - ACPI performance management activated\n", cpu); + + for (i = 0; i < data->acpi_data.state_count; i++) + pr_debug(" %cP%d: %d MHz, %d mW, %d uS, %d uS, 0x%x 0x%x\n", + (i == data->acpi_data.state?'*':' '), i, + (u32) data->acpi_data.states[i].core_frequency, + (u32) data->acpi_data.states[i].power, + (u32) data->acpi_data.states[i].transition_latency, + (u32) data->acpi_data.states[i].bus_master_latency, + (u32) data->acpi_data.states[i].status, + (u32) data->acpi_data.states[i].control); + + /* the first call to ->target() should result in us actually + * writing something to the appropriate registers. */ + data->resume = 1; + + return (result); + + err_unreg: + acpi_processor_unregister_performance(cpu); + err_free: + kfree(data); + acpi_io_data[cpu] = NULL; + + return (result); +} + + +static int +acpi_cpufreq_cpu_exit ( + struct cpufreq_policy *policy) +{ + struct cpufreq_acpi_io *data = acpi_io_data[policy->cpu]; + + pr_debug("acpi_cpufreq_cpu_exit\n"); + + if (data) { + acpi_io_data[policy->cpu] = NULL; + acpi_processor_unregister_performance(policy->cpu); + kfree(policy->freq_table); + kfree(data); + } + + return (0); +} + + +static struct cpufreq_driver acpi_cpufreq_driver = { + .verify = cpufreq_generic_frequency_table_verify, + .target_index = acpi_cpufreq_target, + .get = acpi_cpufreq_get, + .init = acpi_cpufreq_cpu_init, + .exit = acpi_cpufreq_cpu_exit, + .name = "acpi-cpufreq", + .attr = cpufreq_generic_attr, +}; + + +static int __init +acpi_cpufreq_init (void) +{ + pr_debug("acpi_cpufreq_init\n"); + + return cpufreq_register_driver(&acpi_cpufreq_driver); +} + + +static void __exit +acpi_cpufreq_exit (void) +{ + pr_debug("acpi_cpufreq_exit\n"); + + cpufreq_unregister_driver(&acpi_cpufreq_driver); +} + +late_initcall(acpi_cpufreq_init); +module_exit(acpi_cpufreq_exit); diff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index afd38539b92e..6d3f416e61f1 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig @@ -68,6 +68,30 @@ config FIRMWARE_MEMMAP See also Documentation/ABI/testing/sysfs-firmware-memmap. +config EFI_PCDP + bool "Console device selection via EFI PCDP or HCDP table" + depends on ACPI && EFI && IA64 + default y if IA64 + help + If your firmware supplies the PCDP table, and you want to + automatically use the primary console device it describes + as the Linux console, say Y here. + + If your firmware supplies the HCDP table, and you want to + use the first serial port it describes as the Linux console, + say Y here. If your EFI ConOut path contains only a UART + device, it will become the console automatically. Otherwise, + you must specify the "console=hcdp" kernel boot argument. + + Neither the PCDP nor the HCDP affects naming of serial devices, + so a serial console may be /dev/ttyS0, /dev/ttyS1, etc, depending + on how the driver discovers devices. + + You must also enable the appropriate drivers (serial, VGA, etc.) + + See DIG64_HCDPv20_042804.pdf available from + + config DMIID bool "Export DMI identification via sysfs to userspace" depends on DMI diff --git a/drivers/firmware/Makefile b/drivers/firmware/Makefile index 7a8d486e718f..e7d1272a5f0f 100644 --- a/drivers/firmware/Makefile +++ b/drivers/firmware/Makefile @@ -7,6 +7,7 @@ obj-$(CONFIG_ARM_SDE_INTERFACE) += arm_sdei.o obj-$(CONFIG_DMI) += dmi_scan.o obj-$(CONFIG_DMI_SYSFS) += dmi-sysfs.o obj-$(CONFIG_EDD) += edd.o +obj-$(CONFIG_EFI_PCDP) += pcdp.o obj-$(CONFIG_DMIID) += dmi-id.o obj-$(CONFIG_INTEL_STRATIX10_SERVICE) += stratix10-svc.o obj-$(CONFIG_INTEL_STRATIX10_RSU) += stratix10-rsu.o diff --git a/drivers/firmware/efi/Kconfig b/drivers/firmware/efi/Kconfig index 72f2537d90ca..4a77a28f9ca6 100644 --- a/drivers/firmware/efi/Kconfig +++ b/drivers/firmware/efi/Kconfig @@ -4,7 +4,7 @@ menu "EFI (Extensible Firmware Interface) Support" config EFI_ESRT bool - depends on EFI + depends on EFI && !IA64 default y config EFI_VARS_PSTORE @@ -123,7 +123,7 @@ config EFI_BOOTLOADER_CONTROL config EFI_CAPSULE_LOADER tristate "EFI capsule loader" - depends on EFI + depends on EFI && !IA64 help This option exposes a loader interface "/dev/efi_capsule_loader" for users to load EFI capsules. This driver requires working runtime @@ -224,7 +224,7 @@ config EFI_DISABLE_PCI_DMA config EFI_EARLYCON def_bool y - depends on SERIAL_EARLYCON && !ARM + depends on SERIAL_EARLYCON && !ARM && !IA64 select FONT_SUPPORT select ARCH_USE_MEMREMAP_PROT diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c index 4fcda50acfa4..80146298fc66 100644 --- a/drivers/firmware/efi/efi.c +++ b/drivers/firmware/efi/efi.c @@ -148,7 +148,7 @@ static ssize_t systab_show(struct kobject *kobj, if (efi.smbios != EFI_INVALID_TABLE_ADDR) str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios); - if (IS_ENABLED(CONFIG_X86)) + if (IS_ENABLED(CONFIG_IA64) || IS_ENABLED(CONFIG_X86)) str = efi_systab_show_arch(str); return str - buf; @@ -825,6 +825,7 @@ int __init efi_systab_check_header(const efi_table_hdr_t *systab_hdr) return 0; } +#ifndef CONFIG_IA64 static const efi_char16_t *__init map_fw_vendor(unsigned long fw_vendor, size_t size) { @@ -840,6 +841,10 @@ static void __init unmap_fw_vendor(const void *fw_vendor, size_t size) { early_memunmap((void *)fw_vendor, size); } +#else +#define map_fw_vendor(p, s) __va(p) +#define unmap_fw_vendor(v, s) +#endif void __init efi_systab_report_header(const efi_table_hdr_t *systab_hdr, unsigned long fw_vendor) @@ -942,6 +947,11 @@ char * __init efi_md_typeattr_format(char *buf, size_t size, return buf; } +/* + * IA64 has a funky EFI memory map that doesn't work the same way as + * other architectures. + */ +#ifndef CONFIG_IA64 /* * efi_mem_attributes - lookup memmap attributes for physical address * @phys_addr: the physical address to lookup @@ -989,6 +999,7 @@ int efi_mem_type(unsigned long phys_addr) } return -EINVAL; } +#endif int efi_status_to_err(efi_status_t status) { diff --git a/drivers/firmware/pcdp.c b/drivers/firmware/pcdp.c new file mode 100644 index 000000000000..715a45442d1c --- /dev/null +++ b/drivers/firmware/pcdp.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Parse the EFI PCDP table to locate the console device. + * + * (c) Copyright 2002, 2003, 2004 Hewlett-Packard Development Company, L.P. + * Khalid Aziz + * Alex Williamson + * Bjorn Helgaas + */ + +#include +#include +#include +#include +#include +#include +#include "pcdp.h" + +static int __init +setup_serial_console(struct pcdp_uart *uart) +{ +#ifdef CONFIG_SERIAL_8250_CONSOLE + int mmio; + static char options[64], *p = options; + char parity; + + mmio = (uart->addr.space_id == ACPI_ADR_SPACE_SYSTEM_MEMORY); + p += sprintf(p, "uart8250,%s,0x%llx", + mmio ? "mmio" : "io", uart->addr.address); + if (uart->baud) { + p += sprintf(p, ",%llu", uart->baud); + if (uart->bits) { + switch (uart->parity) { + case 0x2: parity = 'e'; break; + case 0x3: parity = 'o'; break; + default: parity = 'n'; + } + p += sprintf(p, "%c%d", parity, uart->bits); + } + } + + add_preferred_console("uart", 8250, &options[9]); + return setup_earlycon(options); +#else + return -ENODEV; +#endif +} + +static int __init +setup_vga_console(struct pcdp_device *dev) +{ +#if defined(CONFIG_VT) && defined(CONFIG_VGA_CONSOLE) + u8 *if_ptr; + + if_ptr = ((u8 *)dev + sizeof(struct pcdp_device)); + if (if_ptr[0] == PCDP_IF_PCI) { + struct pcdp_if_pci if_pci; + + /* struct copy since ifptr might not be correctly aligned */ + + memcpy(&if_pci, if_ptr, sizeof(if_pci)); + + if (if_pci.trans & PCDP_PCI_TRANS_IOPORT) + vga_console_iobase = if_pci.ioport_tra; + + if (if_pci.trans & PCDP_PCI_TRANS_MMIO) + vga_console_membase = if_pci.mmio_tra; + } + + if (efi_mem_type(vga_console_membase + 0xA0000) == EFI_CONVENTIONAL_MEMORY) { + printk(KERN_ERR "PCDP: VGA selected, but frame buffer is not MMIO!\n"); + return -ENODEV; + } + + conswitchp = &vga_con; + printk(KERN_INFO "PCDP: VGA console\n"); + return 0; +#else + return -ENODEV; +#endif +} + +extern unsigned long hcdp_phys; + +int __init +efi_setup_pcdp_console(char *cmdline) +{ + struct pcdp *pcdp; + struct pcdp_uart *uart; + struct pcdp_device *dev, *end; + int i, serial = 0; + int rc = -ENODEV; + + if (hcdp_phys == EFI_INVALID_TABLE_ADDR) + return -ENODEV; + + pcdp = early_memremap(hcdp_phys, 4096); + printk(KERN_INFO "PCDP: v%d at 0x%lx\n", pcdp->rev, hcdp_phys); + + if (strstr(cmdline, "console=hcdp")) { + if (pcdp->rev < 3) + serial = 1; + } else if (strstr(cmdline, "console=")) { + printk(KERN_INFO "Explicit \"console=\"; ignoring PCDP\n"); + goto out; + } + + if (pcdp->rev < 3 && efi_uart_console_only()) + serial = 1; + + for (i = 0, uart = pcdp->uart; i < pcdp->num_uarts; i++, uart++) { + if (uart->flags & PCDP_UART_PRIMARY_CONSOLE || serial) { + if (uart->type == PCDP_CONSOLE_UART) { + rc = setup_serial_console(uart); + goto out; + } + } + } + + end = (struct pcdp_device *) ((u8 *) pcdp + pcdp->length); + for (dev = (struct pcdp_device *) (pcdp->uart + pcdp->num_uarts); + dev < end; + dev = (struct pcdp_device *) ((u8 *) dev + dev->length)) { + if (dev->flags & PCDP_PRIMARY_CONSOLE) { + if (dev->type == PCDP_CONSOLE_VGA) { + rc = setup_vga_console(dev); + goto out; + } + } + } + +out: + early_memunmap(pcdp, 4096); + return rc; +} diff --git a/drivers/firmware/pcdp.h b/drivers/firmware/pcdp.h new file mode 100644 index 000000000000..e02540571c52 --- /dev/null +++ b/drivers/firmware/pcdp.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Definitions for PCDP-defined console devices + * + * For DIG64_HCDPv10a_01.pdf and DIG64_PCDPv20.pdf (v1.0a and v2.0 resp.), + * please see + * + * (c) Copyright 2002, 2004 Hewlett-Packard Development Company, L.P. + * Khalid Aziz + * Bjorn Helgaas + */ + +#define PCDP_CONSOLE 0 +#define PCDP_DEBUG 1 +#define PCDP_CONSOLE_OUTPUT 2 +#define PCDP_CONSOLE_INPUT 3 + +#define PCDP_UART (0 << 3) +#define PCDP_VGA (1 << 3) +#define PCDP_USB (2 << 3) + +/* pcdp_uart.type and pcdp_device.type */ +#define PCDP_CONSOLE_UART (PCDP_UART | PCDP_CONSOLE) +#define PCDP_DEBUG_UART (PCDP_UART | PCDP_DEBUG) +#define PCDP_CONSOLE_VGA (PCDP_VGA | PCDP_CONSOLE_OUTPUT) +#define PCDP_CONSOLE_USB (PCDP_USB | PCDP_CONSOLE_INPUT) + +/* pcdp_uart.flags */ +#define PCDP_UART_EDGE_SENSITIVE (1 << 0) +#define PCDP_UART_ACTIVE_LOW (1 << 1) +#define PCDP_UART_PRIMARY_CONSOLE (1 << 2) +#define PCDP_UART_IRQ (1 << 6) /* in pci_func for rev < 3 */ +#define PCDP_UART_PCI (1 << 7) /* in pci_func for rev < 3 */ + +struct pcdp_uart { + u8 type; + u8 bits; + u8 parity; + u8 stop_bits; + u8 pci_seg; + u8 pci_bus; + u8 pci_dev; + u8 pci_func; + u64 baud; + struct acpi_generic_address addr; + u16 pci_dev_id; + u16 pci_vendor_id; + u32 gsi; + u32 clock_rate; + u8 pci_prog_intfc; + u8 flags; + u16 conout_index; + u32 reserved; +} __attribute__((packed)); + +#define PCDP_IF_PCI 1 + +/* pcdp_if_pci.trans */ +#define PCDP_PCI_TRANS_IOPORT 0x02 +#define PCDP_PCI_TRANS_MMIO 0x01 + +struct pcdp_if_pci { + u8 interconnect; + u8 reserved; + u16 length; + u8 segment; + u8 bus; + u8 dev; + u8 fun; + u16 dev_id; + u16 vendor_id; + u32 acpi_interrupt; + u64 mmio_tra; + u64 ioport_tra; + u8 flags; + u8 trans; +} __attribute__((packed)); + +struct pcdp_vga { + u8 count; /* address space descriptors */ +} __attribute__((packed)); + +/* pcdp_device.flags */ +#define PCDP_PRIMARY_CONSOLE 1 + +struct pcdp_device { + u8 type; + u8 flags; + u16 length; + u16 efi_index; + /* next data is pcdp_if_pci or pcdp_if_acpi (not yet supported) */ + /* next data is device specific type (currently only pcdp_vga) */ +} __attribute__((packed)); + +struct pcdp { + u8 signature[4]; + u32 length; + u8 rev; /* PCDP v2.0 is rev 3 */ + u8 chksum; + u8 oemid[6]; + u8 oem_tabid[8]; + u32 oem_rev; + u8 creator_id[4]; + u32 creator_rev; + u32 num_uarts; + struct pcdp_uart uart[]; /* actual size is num_uarts */ + /* remainder of table is pcdp_device structures */ +} __attribute__((packed)); diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c index 129e2b91dbfe..f6d278df6686 100644 --- a/drivers/gpu/drm/drm_ioc32.c +++ b/drivers/gpu/drm/drm_ioc32.c @@ -336,11 +336,11 @@ static struct { DRM_IOCTL32_DEF(DRM_IOCTL_GET_CLIENT, compat_drm_getclient), DRM_IOCTL32_DEF(DRM_IOCTL_GET_STATS, compat_drm_getstats), DRM_IOCTL32_DEF(DRM_IOCTL_SET_UNIQUE, compat_drm_setunique), -#if defined(CONFIG_X86) +#if defined(CONFIG_X86) || defined(CONFIG_IA64) DRM_IOCTL32_DEF(DRM_IOCTL_UPDATE_DRAW, compat_drm_update_draw), #endif DRM_IOCTL32_DEF(DRM_IOCTL_WAIT_VBLANK, compat_drm_wait_vblank), -#if defined(CONFIG_X86) +#if defined(CONFIG_X86) || defined(CONFIG_IA64) DRM_IOCTL32_DEF(DRM_IOCTL_MODE_ADDFB2, compat_drm_mode_addfb2), #endif }; diff --git a/drivers/input/serio/i8042.h b/drivers/input/serio/i8042.h index 5f61672d55b7..adb5173372d3 100644 --- a/drivers/input/serio/i8042.h +++ b/drivers/input/serio/i8042.h @@ -19,7 +19,7 @@ #include "i8042-snirm.h" #elif defined(CONFIG_SPARC) #include "i8042-sparcio.h" -#elif defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) +#elif defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) #include "i8042-acpipnpio.h" #else #include "i8042-io.h" diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 9a29d742617e..36af448fac28 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -95,7 +95,7 @@ config IOMMU_DEBUGFS choice prompt "IOMMU default domain type" depends on IOMMU_API - default IOMMU_DEFAULT_DMA_LAZY if X86 || S390 + default IOMMU_DEFAULT_DMA_LAZY if X86 || IA64 || S390 default IOMMU_DEFAULT_DMA_STRICT help Choose the type of IOMMU domain used to manage DMA API usage by @@ -150,7 +150,7 @@ config OF_IOMMU # IOMMU-agnostic DMA-mapping layer config IOMMU_DMA - def_bool ARM64 || X86 || S390 + def_bool ARM64 || X86 || IA64 || S390 select DMA_OPS select IOMMU_API select IOMMU_IOVA diff --git a/drivers/iommu/intel/Kconfig b/drivers/iommu/intel/Kconfig index 012cd2541a68..f5348b80652b 100644 --- a/drivers/iommu/intel/Kconfig +++ b/drivers/iommu/intel/Kconfig @@ -11,7 +11,7 @@ config DMAR_DEBUG config INTEL_IOMMU bool "Support for Intel IOMMU using DMA Remapping Devices" - depends on PCI_MSI && ACPI && X86 + depends on PCI_MSI && ACPI && (X86 || IA64) select DMA_OPS select IOMMU_API select IOMMU_IOVA diff --git a/drivers/media/cec/platform/Kconfig b/drivers/media/cec/platform/Kconfig index ede81fe331b0..b672d3142eb7 100644 --- a/drivers/media/cec/platform/Kconfig +++ b/drivers/media/cec/platform/Kconfig @@ -99,7 +99,7 @@ config CEC_TEGRA config CEC_SECO tristate "SECO Boards HDMI CEC driver" - depends on X86 || COMPILE_TEST + depends on (X86 || IA64) || COMPILE_TEST depends on PCI && DMI select CEC_CORE select CEC_NOTIFIER diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig index 4fb291f0bf7c..c63c8cb617a3 100644 --- a/drivers/misc/Kconfig +++ b/drivers/misc/Kconfig @@ -166,7 +166,7 @@ config ENCLOSURE_SERVICES config SGI_XP tristate "Support communication between SGI SSIs" depends on NET - depends on X86_UV && SMP + depends on (IA64_SGI_UV || X86_UV) && SMP depends on X86_64 || BROKEN select SGI_GRU if X86_64 && SMP help diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h index 6ae045037219..3ad76cd18b4b 100644 --- a/drivers/misc/sgi-gru/gru.h +++ b/drivers/misc/sgi-gru/gru.h @@ -30,7 +30,9 @@ /* * Size used to map GRU GSeg */ -#if defined(CONFIG_X86_64) +#if defined(CONFIG_IA64) +#define GRU_GSEG_PAGESIZE (256 * 1024UL) +#elif defined(CONFIG_X86_64) #define GRU_GSEG_PAGESIZE (256 * 1024UL) /* ZZZ 2MB ??? */ #else #error "Unsupported architecture" diff --git a/drivers/misc/sgi-gru/gru_instructions.h b/drivers/misc/sgi-gru/gru_instructions.h index da5eb9edf9ec..04d5170ac149 100644 --- a/drivers/misc/sgi-gru/gru_instructions.h +++ b/drivers/misc/sgi-gru/gru_instructions.h @@ -29,7 +29,17 @@ extern void gru_wait_abort_proc(void *cb); * Architecture dependent functions */ -#if defined(CONFIG_X86_64) +#if defined(CONFIG_IA64) +#include +#include +#define __flush_cache(p) ia64_fc((unsigned long)p) +/* Use volatile on IA64 to ensure ordering via st4.rel */ +#define gru_ordered_store_ulong(p, v) \ + do { \ + barrier(); \ + *((volatile unsigned long *)(p)) = v; /* force st.rel */ \ + } while (0) +#elif defined(CONFIG_X86_64) #include #define __flush_cache(p) clflush(p) #define gru_ordered_store_ulong(p, v) \ diff --git a/drivers/misc/sgi-gru/grufile.c b/drivers/misc/sgi-gru/grufile.c index e755690c9805..a3d659c11cc4 100644 --- a/drivers/misc/sgi-gru/grufile.c +++ b/drivers/misc/sgi-gru/grufile.c @@ -337,6 +337,72 @@ static unsigned long gru_chiplet_cpu_to_mmr(int chiplet, int cpu, int *corep) return mmr; } +#ifdef CONFIG_IA64 + +static int gru_irq_count[GRU_CHIPLETS_PER_BLADE]; + +static void gru_noop(struct irq_data *d) +{ +} + +static struct irq_chip gru_chip[GRU_CHIPLETS_PER_BLADE] = { + [0 ... GRU_CHIPLETS_PER_BLADE - 1] { + .irq_mask = gru_noop, + .irq_unmask = gru_noop, + .irq_ack = gru_noop + } +}; + +static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, + irq_handler_t irq_handler, int cpu, int blade) +{ + unsigned long mmr; + int irq = IRQ_GRU + chiplet; + int ret, core; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return 0; + + if (gru_irq_count[chiplet] == 0) { + gru_chip[chiplet].name = irq_name; + ret = irq_set_chip(irq, &gru_chip[chiplet]); + if (ret) { + printk(KERN_ERR "%s: set_irq_chip failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + + ret = request_irq(irq, irq_handler, 0, irq_name, NULL); + if (ret) { + printk(KERN_ERR "%s: request_irq failed, errno=%d\n", + GRU_DRIVER_ID_STR, -ret); + return ret; + } + } + gru_irq_count[chiplet]++; + + return 0; +} + +static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) +{ + unsigned long mmr; + int core, irq = IRQ_GRU + chiplet; + + if (gru_irq_count[chiplet] == 0) + return; + + mmr = gru_chiplet_cpu_to_mmr(chiplet, cpu, &core); + if (mmr == 0) + return; + + if (--gru_irq_count[chiplet] == 0) + free_irq(irq, NULL); +} + +#elif defined CONFIG_X86_64 + static int gru_chiplet_setup_tlb_irq(int chiplet, char *irq_name, irq_handler_t irq_handler, int cpu, int blade) { @@ -381,6 +447,8 @@ static void gru_chiplet_teardown_tlb_irq(int chiplet, int cpu, int blade) } } +#endif + static void gru_teardown_tlb_irqs(void) { int blade; @@ -446,8 +514,12 @@ static int __init gru_init(void) if (!gru_supported()) return 0; +#if defined CONFIG_IA64 + gru_start_paddr = 0xd000000000UL; /* ZZZZZZZZZZZZZZZZZZZ fixme */ +#else gru_start_paddr = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG) & 0x7fffffffffffUL; +#endif gru_start_vaddr = __va(gru_start_paddr); gru_end_paddr = gru_start_paddr + GRU_MAX_BLADES * GRU_SIZE; printk(KERN_INFO "GRU space: 0x%lx - 0x%lx\n", diff --git a/drivers/misc/sgi-gru/gruhandles.c b/drivers/misc/sgi-gru/gruhandles.c index 695316a83b01..1d75d5e540bc 100644 --- a/drivers/misc/sgi-gru/gruhandles.c +++ b/drivers/misc/sgi-gru/gruhandles.c @@ -11,10 +11,16 @@ #include "grutables.h" /* 10 sec */ +#ifdef CONFIG_IA64 +#include +#define GRU_OPERATION_TIMEOUT (((cycles_t) local_cpu_data->itc_freq)*10) +#define CLKS2NSEC(c) ((c) *1000000000 / local_cpu_data->itc_freq) +#else #include #include #define GRU_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) #define CLKS2NSEC(c) ((c) * 1000000 / tsc_khz) +#endif /* Extract the status field from a kernel handle */ #define GET_MSEG_HANDLE_STATUS(h) (((*(unsigned long *)(h)) >> 16) & 3) diff --git a/drivers/misc/sgi-gru/grumain.c b/drivers/misc/sgi-gru/grumain.c index 0f5b09e290c8..4eb4b9455139 100644 --- a/drivers/misc/sgi-gru/grumain.c +++ b/drivers/misc/sgi-gru/grumain.c @@ -41,12 +41,16 @@ struct device *grudev = &gru_device; */ int gru_cpu_fault_map_id(void) { +#ifdef CONFIG_IA64 + return uv_blade_processor_id() % GRU_NUM_TFM; +#else int cpu = smp_processor_id(); int id, core; core = uv_cpu_core_number(cpu); id = core + UV_MAX_INT_CORES * uv_cpu_socket_number(cpu); return id; +#endif } /*--------- ASID Management ------------------------------------------- diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h index 3185711beb07..f1336f43d3bd 100644 --- a/drivers/misc/sgi-xp/xp.h +++ b/drivers/misc/sgi-xp/xp.h @@ -16,7 +16,7 @@ #include -#if defined CONFIG_X86_UV +#if defined CONFIG_X86_UV || defined CONFIG_IA64_SGI_UV #include #endif diff --git a/drivers/misc/sgi-xp/xp_uv.c b/drivers/misc/sgi-xp/xp_uv.c index 3faa7eadf679..19fc7076af27 100644 --- a/drivers/misc/sgi-xp/xp_uv.c +++ b/drivers/misc/sgi-xp/xp_uv.c @@ -18,6 +18,8 @@ #include #if defined CONFIG_X86_64 #include +#elif defined CONFIG_IA64_SGI_UV +#include #endif #include "../sgi-gru/grukservices.h" #include "xp.h" @@ -97,6 +99,17 @@ xp_expand_memprotect_uv(unsigned long phys_addr, unsigned long size) "UV_MEMPROT_ALLOW_RW) failed, ret=%d\n", ret); return xpBiosError; } + +#elif defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_1, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_1,) failed ret=%d\n", ret); + return xpSalError; + } #else #error not a supported configuration #endif @@ -116,6 +129,17 @@ xp_restrict_memprotect_uv(unsigned long phys_addr, unsigned long size) "UV_MEMPROT_RESTRICT_ACCESS) failed, ret=%d\n", ret); return xpBiosError; } + +#elif defined CONFIG_IA64_SGI_UV + u64 nasid_array; + + ret = sn_change_memprotect(phys_addr, size, SN_MEMPROT_ACCESS_CLASS_0, + &nasid_array); + if (ret != 0) { + dev_err(xp, "sn_change_memprotect(,, " + "SN_MEMPROT_ACCESS_CLASS_0,) failed ret=%d\n", ret); + return xpSalError; + } #else #error not a supported configuration #endif diff --git a/drivers/misc/sgi-xp/xpc_main.c b/drivers/misc/sgi-xp/xpc_main.c index 61b66e318488..3186421e82c3 100644 --- a/drivers/misc/sgi-xp/xpc_main.c +++ b/drivers/misc/sgi-xp/xpc_main.c @@ -1153,6 +1153,36 @@ xpc_die_deactivate(void) static int xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) { +#ifdef CONFIG_IA64 /* !!! temporary kludge */ + switch (event) { + case DIE_MACHINE_RESTART: + case DIE_MACHINE_HALT: + xpc_die_deactivate(); + break; + + case DIE_KDEBUG_ENTER: + /* Should lack of heartbeat be ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + fallthrough; + case DIE_MCA_MONARCH_ENTER: + case DIE_INIT_MONARCH_ENTER: + xpc_arch_ops.offline_heartbeat(); + break; + + case DIE_KDEBUG_LEAVE: + /* Is lack of heartbeat being ignored by other partitions? */ + if (!xpc_kdebug_ignore) + break; + + fallthrough; + case DIE_MCA_MONARCH_LEAVE: + case DIE_INIT_MONARCH_LEAVE: + xpc_arch_ops.online_heartbeat(); + break; + } +#else struct die_args *die_args = _die_args; switch (event) { @@ -1174,6 +1204,7 @@ xpc_system_die(struct notifier_block *nb, unsigned long event, void *_die_args) default: xpc_die_deactivate(); } +#endif return NOTIFY_DONE; } diff --git a/drivers/misc/sgi-xp/xpc_uv.c b/drivers/misc/sgi-xp/xpc_uv.c index 2f03a7080d96..fff522d347e3 100644 --- a/drivers/misc/sgi-xp/xpc_uv.c +++ b/drivers/misc/sgi-xp/xpc_uv.c @@ -24,12 +24,34 @@ #include #include #include +#if defined CONFIG_X86_64 #include #include +#elif defined CONFIG_IA64_SGI_UV +#include +#include +#endif #include "../sgi-gru/gru.h" #include "../sgi-gru/grukservices.h" #include "xpc.h" +#if defined CONFIG_IA64_SGI_UV +struct uv_IO_APIC_route_entry { + __u64 vector : 8, + delivery_mode : 3, + dest_mode : 1, + delivery_status : 1, + polarity : 1, + __reserved_1 : 1, + trigger : 1, + mask : 1, + __reserved_2 : 15, + dest : 32; +}; + +#define sn_partition_id 0 +#endif + static struct xpc_heartbeat_uv *xpc_heartbeat_uv; #define XPC_ACTIVATE_MSG_SIZE_UV (1 * GRU_CACHE_LINE_BYTES) @@ -91,6 +113,7 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) { int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); +#if defined CONFIG_X86_64 mq->irq = uv_setup_irq(irq_name, cpu, mq->mmr_blade, mq->mmr_offset, UV_AFFINITY_CPU); if (mq->irq < 0) @@ -98,13 +121,40 @@ xpc_get_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq, int cpu, char *irq_name) mq->mmr_value = uv_read_global_mmr64(mmr_pnode, mq->mmr_offset); +#elif defined CONFIG_IA64_SGI_UV + if (strcmp(irq_name, XPC_ACTIVATE_IRQ_NAME) == 0) + mq->irq = SGI_XPC_ACTIVATE; + else if (strcmp(irq_name, XPC_NOTIFY_IRQ_NAME) == 0) + mq->irq = SGI_XPC_NOTIFY; + else + return -EINVAL; + + mq->mmr_value = (unsigned long)cpu_physical_id(cpu) << 32 | mq->irq; + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mq->mmr_value); +#else + #error not a supported configuration +#endif + return 0; } static void xpc_release_gru_mq_irq_uv(struct xpc_gru_mq_uv *mq) { +#if defined CONFIG_X86_64 uv_teardown_irq(mq->irq); + +#elif defined CONFIG_IA64_SGI_UV + int mmr_pnode; + unsigned long mmr_value; + + mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + mmr_value = 1UL << 16; + + uv_write_global_mmr64(mmr_pnode, mq->mmr_offset, mmr_value); +#else + #error not a supported configuration +#endif } static int @@ -112,6 +162,17 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) { int ret; +#if defined CONFIG_IA64_SGI_UV + int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); + + ret = sn_mq_watchlist_alloc(mmr_pnode, (void *)uv_gpa(mq->address), + mq->order, &mq->mmr_offset); + if (ret < 0) { + dev_err(xpc_part, "sn_mq_watchlist_alloc() failed, ret=%d\n", + ret); + return -EBUSY; + } +#elif defined CONFIG_X86_64 ret = uv_bios_mq_watchlist_alloc(uv_gpa(mq->address), mq->order, &mq->mmr_offset); if (ret < 0) { @@ -119,6 +180,9 @@ xpc_gru_mq_watchlist_alloc_uv(struct xpc_gru_mq_uv *mq) "ret=%d\n", ret); return ret; } +#else + #error not a supported configuration +#endif mq->watchlist_num = ret; return 0; @@ -130,8 +194,15 @@ xpc_gru_mq_watchlist_free_uv(struct xpc_gru_mq_uv *mq) int ret; int mmr_pnode = uv_blade_to_pnode(mq->mmr_blade); +#if defined CONFIG_X86_64 ret = uv_bios_mq_watchlist_free(mmr_pnode, mq->watchlist_num); BUG_ON(ret != BIOS_STATUS_SUCCESS); +#elif defined CONFIG_IA64_SGI_UV + ret = sn_mq_watchlist_free(mmr_pnode, mq->watchlist_num); + BUG_ON(ret != SALRET_OK); +#else + #error not a supported configuration +#endif } static struct xpc_gru_mq_uv * @@ -715,6 +786,7 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, s64 status; enum xp_retval ret; +#if defined CONFIG_X86_64 status = uv_bios_reserved_page_pa((u64)buf, cookie, (u64 *)rp_pa, (u64 *)len); if (status == BIOS_STATUS_SUCCESS) @@ -724,6 +796,19 @@ xpc_get_partition_rsvd_page_pa_uv(void *buf, u64 *cookie, unsigned long *rp_pa, else ret = xpBiosError; +#elif defined CONFIG_IA64_SGI_UV + status = sn_partition_reserved_page_pa((u64)buf, cookie, rp_pa, len); + if (status == SALRET_OK) + ret = xpSuccess; + else if (status == SALRET_MORE_PASSES) + ret = xpNeedMoreInfo; + else + ret = xpSalError; + +#else + #error not a supported configuration +#endif + return ret; } diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 04964bbe08cf..ab38bdf15714 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -17124,7 +17124,7 @@ static u32 tg3_calc_dma_bndry(struct tg3 *tp, u32 val) !tg3_flag(tp, PCI_EXPRESS)) goto out; -#if defined(CONFIG_PPC64) || defined(CONFIG_PARISC) +#if defined(CONFIG_PPC64) || defined(CONFIG_IA64) || defined(CONFIG_PARISC) goal = BOUNDARY_MULTI_CACHELINE; #else #if defined(CONFIG_SPARC64) || defined(CONFIG_ALPHA) diff --git a/drivers/net/ethernet/brocade/bna/bnad.h b/drivers/net/ethernet/brocade/bna/bnad.h index 10b1e534030e..627a93ce38ab 100644 --- a/drivers/net/ethernet/brocade/bna/bnad.h +++ b/drivers/net/ethernet/brocade/bna/bnad.h @@ -19,6 +19,7 @@ #include #include +/* Fix for IA64 */ #include #include diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c index ed24d6af7487..1d1e183d3a8b 100644 --- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c +++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c @@ -233,7 +233,9 @@ static int nx_set_dma_mask(struct netxen_adapter *adapter) cmask = DMA_BIT_MASK(32); if (NX_IS_REVISION_P2(adapter->ahw.revision_id)) { +#ifndef CONFIG_IA64 mask = DMA_BIT_MASK(35); +#endif } else { mask = DMA_BIT_MASK(39); cmask = mask; diff --git a/drivers/pci/vgaarb.c b/drivers/pci/vgaarb.c index 78748e8d2dba..feca96fc4255 100644 --- a/drivers/pci/vgaarb.c +++ b/drivers/pci/vgaarb.c @@ -556,7 +556,7 @@ EXPORT_SYMBOL(vga_put); static bool vga_is_firmware_default(struct pci_dev *pdev) { -#if defined(CONFIG_X86) +#if defined(CONFIG_X86) || defined(CONFIG_IA64) u64 base = screen_info.lfb_base; u64 size = screen_info.lfb_size; struct resource *r; diff --git a/drivers/tty/serial/8250/Kconfig b/drivers/tty/serial/8250/Kconfig index 8b9a2c4902e2..d91924cb9b21 100644 --- a/drivers/tty/serial/8250/Kconfig +++ b/drivers/tty/serial/8250/Kconfig @@ -222,7 +222,7 @@ config SERIAL_8250_EXTENDED config SERIAL_8250_MANY_PORTS bool "Support more than 4 legacy serial ports" - depends on SERIAL_8250_EXTENDED + depends on SERIAL_8250_EXTENDED && !IA64 help Say Y here if you have dumb serial boards other than the four standard COM 1/2/3/4 ports. This may happen if you have an AST diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 407b0d87b7c1..2408c5e46139 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -1047,7 +1047,6 @@ static ssize_t iterate_tty_write(struct tty_ldisc *ld, struct tty_struct *tty, return ret; } -#ifdef CONFIG_PRINT_QUOTA_WARNING /** * tty_write_message - write a message to a certain tty, not just the console. * @tty: the destination tty_struct @@ -1058,8 +1057,6 @@ static ssize_t iterate_tty_write(struct tty_ldisc *ld, struct tty_struct *tty, * needed. * * We must still hold the BTM and test the CLOSING flag for the moment. - * - * This function is DEPRECATED, do not use in new code. */ void tty_write_message(struct tty_struct *tty, char *msg) { @@ -1072,7 +1069,6 @@ void tty_write_message(struct tty_struct *tty, char *msg) tty_write_unlock(tty); } } -#endif static ssize_t file_tty_write(struct file *file, struct kiocb *iocb, struct iov_iter *from) { diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index a2116e135a82..74ada22f6b71 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -1273,7 +1273,7 @@ static void kbd_bh(struct tasklet_struct *unused) } } -#if defined(CONFIG_X86) || defined(CONFIG_ALPHA) ||\ +#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_ALPHA) ||\ defined(CONFIG_MIPS) || defined(CONFIG_PPC) || defined(CONFIG_SPARC) ||\ defined(CONFIG_PARISC) || defined(CONFIG_SUPERH) ||\ (defined(CONFIG_ARM) && defined(CONFIG_KEYBOARD_ATKBD) && !defined(CONFIG_ARCH_RPC)) diff --git a/drivers/video/fbdev/Kconfig b/drivers/video/fbdev/Kconfig index 2d0bcc1d786e..12a211cca372 100644 --- a/drivers/video/fbdev/Kconfig +++ b/drivers/video/fbdev/Kconfig @@ -442,7 +442,7 @@ config FB_VESA config FB_EFI bool "EFI-based Framebuffer Support" - depends on (FB = y) && EFI + depends on (FB = y) && !IA64 && EFI select APERTURE_HELPERS select DRM_PANEL_ORIENTATION_QUIRKS select FB_IOMEM_HELPERS diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 7d22051b15a2..b9f715d73fe8 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -1287,7 +1287,7 @@ config INTEL_MID_WATCHDOG config ITCO_WDT tristate "Intel TCO Timer/Watchdog" - depends on X86 && PCI + depends on (X86 || IA64) && PCI select WATCHDOG_CORE depends on I2C || I2C=n depends on MFD_INTEL_PMC_BXT || !MFD_INTEL_PMC_BXT diff --git a/fs/Kconfig b/fs/Kconfig index 89fdbefd1075..486842abc8be 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -259,7 +259,7 @@ config ARCH_SUPPORTS_HUGETLBFS menuconfig HUGETLBFS bool "HugeTLB file system support" - depends on X86 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN + depends on X86 || IA64 || SPARC64 || ARCH_SUPPORTS_HUGETLBFS || BROKEN depends on (SYSFS || SYSCTL) select MEMFD_CREATE help diff --git a/fs/afs/main.c b/fs/afs/main.c index a14f6013e316..e0df789d79bf 100644 --- a/fs/afs/main.c +++ b/fs/afs/main.c @@ -41,6 +41,8 @@ const char afs_init_sysname[] = "arm_linux26"; const char afs_init_sysname[] = "aarch64_linux26"; #elif defined(CONFIG_X86_32) const char afs_init_sysname[] = "i386_linux26"; +#elif defined(CONFIG_IA64) +const char afs_init_sysname[] = "ia64_linux26"; #elif defined(CONFIG_PPC64) const char afs_init_sysname[] = "ppc64_linux26"; #elif defined(CONFIG_PPC32) diff --git a/fs/xfs/xfs_ioctl32.h b/fs/xfs/xfs_ioctl32.h index 052d0e888c27..c14852362fce 100644 --- a/fs/xfs/xfs_ioctl32.h +++ b/fs/xfs/xfs_ioctl32.h @@ -22,7 +22,7 @@ /* * On intel, even if sizes match, alignment and/or padding may differ. */ -#if defined(CONFIG_X86_64) +#if defined(CONFIG_IA64) || defined(CONFIG_X86_64) #define BROKEN_X86_ALIGNMENT #define __compat_packed __attribute__((packed)) #else diff --git a/include/linux/acpi.h b/include/linux/acpi.h index b7165e52b3c6..8f808c3b0fb7 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -245,7 +245,7 @@ static inline bool acpi_gicc_is_usable(struct acpi_madt_generic_interrupt *gicc) /* the following numa functions are architecture-dependent */ void acpi_numa_slit_init (struct acpi_table_slit *slit); -#if defined(CONFIG_X86) || defined(CONFIG_LOONGARCH) +#if defined(CONFIG_X86) || defined(CONFIG_IA64) || defined(CONFIG_LOONGARCH) void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa); #else static inline void @@ -1115,8 +1115,15 @@ static inline int acpi_get_lps0_constraint(struct device *dev) return ACPI_STATE_UNKNOWN; } #endif /* CONFIG_SUSPEND && CONFIG_X86 */ +#ifndef CONFIG_IA64 void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else +static inline void arch_reserve_mem_area(acpi_physical_address addr, + size_t size) +{ +} +#endif /* CONFIG_X86 */ +#else #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0) #endif diff --git a/include/linux/efi.h b/include/linux/efi.h index c74f47711f0b..eacae0cf208c 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -359,10 +359,13 @@ void efi_native_runtime_setup(void); * where the UEFI SPEC breaks the line. */ #define NULL_GUID EFI_GUID(0x00000000, 0x0000, 0x0000, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00) +#define MPS_TABLE_GUID EFI_GUID(0xeb9d2d2f, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_TABLE_GUID EFI_GUID(0xeb9d2d30, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define ACPI_20_TABLE_GUID EFI_GUID(0x8868e871, 0xe4f1, 0x11d3, 0xbc, 0x22, 0x00, 0x80, 0xc7, 0x3c, 0x88, 0x81) #define SMBIOS_TABLE_GUID EFI_GUID(0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) #define SMBIOS3_TABLE_GUID EFI_GUID(0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94) +#define SAL_SYSTEM_TABLE_GUID EFI_GUID(0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x00, 0x90, 0x27, 0x3f, 0xc1, 0x4d) +#define HCDP_TABLE_GUID EFI_GUID(0xf951938d, 0x620b, 0x42ef, 0x82, 0x79, 0xa8, 0x4b, 0x79, 0x61, 0x78, 0x98) #define UGA_IO_PROTOCOL_GUID EFI_GUID(0x61a4d49e, 0x6f68, 0x4f1b, 0xb9, 0x22, 0xa8, 0x6e, 0xed, 0x0b, 0x07, 0xa2) #define EFI_GLOBAL_VARIABLE_GUID EFI_GUID(0x8be4df61, 0x93ca, 0x11d2, 0xaa, 0x0d, 0x00, 0xe0, 0x98, 0x03, 0x2b, 0x8c) #define UV_SYSTEM_TABLE_GUID EFI_GUID(0x3b13a7d4, 0x633e, 0x11dd, 0x93, 0xec, 0xda, 0x25, 0x56, 0xd8, 0x95, 0x93) @@ -849,6 +852,10 @@ static inline int efi_range_is_wc(unsigned long start, unsigned long len) return 1; } +#ifdef CONFIG_EFI_PCDP +extern int __init efi_setup_pcdp_console(char *); +#endif + /* * We play games with efi_enabled so that the compiler will, if * possible, remove EFI-related code altogether. diff --git a/include/linux/export-internal.h b/include/linux/export-internal.h index d445705ac13c..528019477734 100644 --- a/include/linux/export-internal.h +++ b/include/linux/export-internal.h @@ -53,7 +53,9 @@ " .previous" "\n" \ ) -#if defined(CONFIG_PARISC) && defined(CONFIG_64BIT) +#ifdef CONFIG_IA64 +#define KSYM_FUNC(name) @fptr(name) +#elif defined(CONFIG_PARISC) && defined(CONFIG_64BIT) #define KSYM_FUNC(name) P%name #else #define KSYM_FUNC(name) name diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bccb3f1f6262..40fc5813cf93 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -37,6 +37,13 @@ extern struct cred init_cred; #define INIT_TASK_COMM "swapper" +/* Attach to the init_task data structure for proper alignment */ +#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK +#define __init_task_data __section(".data..init_task") +#else +#define __init_task_data /**/ +#endif + /* Attach to the thread_info data structure for proper alignment */ #define __init_thread_info __section(".data..init_thread_info") diff --git a/include/linux/mm.h b/include/linux/mm.h index f5a97dec5169..79180ee319bf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -362,6 +362,8 @@ extern unsigned int kobjsize(const void *objp); # define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 +#elif defined(CONFIG_IA64) +# define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_SPARC64) # define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ # define VM_ARCH_CLEAR VM_SPARC_ADI diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index bfb85fd13e1f..e4597d7edbcf 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -276,7 +276,7 @@ struct kparam_array read-only sections (which is part of respective UNIX ABI on these platforms). So 'const' makes no sense and even causes compile failures with some compilers. */ -#if defined(CONFIG_ALPHA) || defined(CONFIG_PPC64) +#if defined(CONFIG_ALPHA) || defined(CONFIG_IA64) || defined(CONFIG_PPC64) #define __moduleparam_const #else #define __moduleparam_const const diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h index 98030accf641..006e18decfad 100644 --- a/include/linux/raid/pq.h +++ b/include/linux/raid/pq.h @@ -84,6 +84,8 @@ extern const struct raid6_calls raid6_intx1; extern const struct raid6_calls raid6_intx2; extern const struct raid6_calls raid6_intx4; extern const struct raid6_calls raid6_intx8; +extern const struct raid6_calls raid6_intx16; +extern const struct raid6_calls raid6_intx32; extern const struct raid6_calls raid6_mmxx1; extern const struct raid6_calls raid6_mmxx2; extern const struct raid6_calls raid6_sse1x1; diff --git a/include/linux/sched.h b/include/linux/sched.h index ffe8f618ab86..74f55fd07fd6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1836,7 +1836,9 @@ extern void ia64_set_curr_task(int cpu, struct task_struct *p); void yield(void); union thread_union { +#ifndef CONFIG_ARCH_TASK_STRUCT_ON_STACK struct task_struct task; +#endif #ifndef CONFIG_THREAD_INFO_IN_TASK struct thread_info thread_info; #endif diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 4b7664c56208..dab78538a238 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -304,11 +304,20 @@ static inline void kernel_signal_stop(void) schedule(); } +#ifdef __ia64__ +# define ___ARCH_SI_IA64(_a1, _a2, _a3) , _a1, _a2, _a3 +#else +# define ___ARCH_SI_IA64(_a1, _a2, _a3) +#endif -int force_sig_fault_to_task(int sig, int code, void __user *addr, - struct task_struct *t); -int force_sig_fault(int sig, int code, void __user *addr); -int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t); +int force_sig_fault_to_task(int sig, int code, void __user *addr + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) + , struct task_struct *t); +int force_sig_fault(int sig, int code, void __user *addr + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)); +int send_sig_fault(int sig, int code, void __user *addr + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) + , struct task_struct *t); int force_sig_mceerr(int code, void __user *, short); int send_sig_mceerr(int code, void __user *, short, struct task_struct *); diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h index d801409b33cf..1478b9dd05fa 100644 --- a/include/trace/events/mmflags.h +++ b/include/trace/events/mmflags.h @@ -144,7 +144,7 @@ IF_HAVE_PG_ARCH_X(arch_3) #define __VM_ARCH_SPECIFIC_1 {VM_PAT, "pat" } #elif defined(CONFIG_PPC) #define __VM_ARCH_SPECIFIC_1 {VM_SAO, "sao" } -#elif defined(CONFIG_PARISC) +#elif defined(CONFIG_PARISC) || defined(CONFIG_IA64) #define __VM_ARCH_SPECIFIC_1 {VM_GROWSUP, "growsup" } #elif !defined(CONFIG_MMU) #define __VM_ARCH_SPECIFIC_1 {VM_MAPPED_COPY,"mappedcopy" } diff --git a/include/uapi/asm-generic/siginfo.h b/include/uapi/asm-generic/siginfo.h index b7bc545ec3b2..0f52d0ac47c5 100644 --- a/include/uapi/asm-generic/siginfo.h +++ b/include/uapi/asm-generic/siginfo.h @@ -68,6 +68,11 @@ union __sifields { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS, SIGTRAP, SIGEMT */ struct { void __user *_addr; /* faulting insn/memory ref. */ +#ifdef __ia64__ + int _imm; /* immediate value for "break" */ + unsigned int _flags; /* see ia64 si_flags */ + unsigned long _isr; /* isr */ +#endif #define __ADDR_BND_PKEY_PAD (__alignof__(void *) < sizeof(short) ? \ sizeof(short) : __alignof__(void *)) diff --git a/init/Kconfig b/init/Kconfig index bee58f7468c3..01252cbd9d31 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1794,7 +1794,7 @@ config KALLSYMS_ABSOLUTE_PERCPU config KALLSYMS_BASE_RELATIVE bool depends on KALLSYMS - default y + default !IA64 help Instead of emitting them as absolute values in the native word size, emit the symbol references in the kallsyms table as 32-bit entries, diff --git a/init/init_task.c b/init/init_task.c index 7ecb458eb3da..56220898a256 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -52,7 +52,8 @@ static struct sighand_struct init_sighand = { }; #ifdef CONFIG_SHADOW_CALL_STACK -unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = { +unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] + __init_task_data = { [(SCS_SIZE / sizeof(long)) - 1] = SCS_END_MAGIC }; #endif @@ -61,7 +62,12 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = { * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) */ -struct task_struct init_task __aligned(L1_CACHE_BYTES) = { +struct task_struct init_task +#ifdef CONFIG_ARCH_TASK_STRUCT_ON_STACK + __init_task_data +#endif + __aligned(L1_CACHE_BYTES) += { #ifdef CONFIG_THREAD_INFO_IN_TASK .thread_info = INIT_THREAD_INFO(init_task), .stack_refcount = REFCOUNT_INIT(1), diff --git a/kernel/cpu.c b/kernel/cpu.c index e6ec3ba4950b..78bf05445e92 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -1743,6 +1743,9 @@ static int cpu_up(unsigned int cpu, enum cpuhp_state target) if (!cpu_possible(cpu)) { pr_err("can't online cpu %d because it is not configured as may-hotadd at boot time\n", cpu); +#if defined(CONFIG_IA64) + pr_err("please check additional_cpus= boot parameter\n"); +#endif return -EINVAL; } diff --git a/kernel/fork.c b/kernel/fork.c index 0d944e92a43f..5c14448225f8 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -167,6 +167,7 @@ void __weak arch_release_task_struct(struct task_struct *tsk) { } +#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static struct kmem_cache *task_struct_cachep; static inline struct task_struct *alloc_task_struct_node(int node) @@ -178,6 +179,9 @@ static inline void free_task_struct(struct task_struct *tsk) { kmem_cache_free(task_struct_cachep, tsk); } +#endif + +#ifndef CONFIG_ARCH_THREAD_STACK_ALLOCATOR /* * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a @@ -410,6 +414,24 @@ void thread_stack_cache_init(void) } # endif /* THREAD_SIZE >= PAGE_SIZE || defined(CONFIG_VMAP_STACK) */ +#else /* CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ + +static int alloc_thread_stack_node(struct task_struct *tsk, int node) +{ + unsigned long *stack; + + stack = arch_alloc_thread_stack_node(tsk, node); + tsk->stack = stack; + return stack ? 0 : -ENOMEM; +} + +static void free_thread_stack(struct task_struct *tsk) +{ + arch_free_thread_stack(tsk); + tsk->stack = NULL; +} + +#endif /* !CONFIG_ARCH_THREAD_STACK_ALLOCATOR */ /* SLAB cache for signal_struct structures (tsk->signal) */ static struct kmem_cache *signal_cachep; @@ -1019,6 +1041,7 @@ static void set_max_threads(unsigned int max_threads_suggested) int arch_task_struct_size __read_mostly; #endif +#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR static void task_struct_whitelist(unsigned long *offset, unsigned long *size) { /* Fetch thread_struct whitelist for the architecture. */ @@ -1033,10 +1056,12 @@ static void task_struct_whitelist(unsigned long *offset, unsigned long *size) else *offset += offsetof(struct task_struct, thread); } +#endif /* CONFIG_ARCH_TASK_STRUCT_ALLOCATOR */ void __init fork_init(void) { int i; +#ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR #ifndef ARCH_MIN_TASKALIGN #define ARCH_MIN_TASKALIGN 0 #endif @@ -1049,6 +1074,7 @@ void __init fork_init(void) arch_task_struct_size, align, SLAB_PANIC|SLAB_ACCOUNT, useroffset, usersize, NULL); +#endif /* do the arch specific task caches init */ arch_task_cache_init(); @@ -3137,7 +3163,7 @@ static inline bool clone3_stack_valid(struct kernel_clone_args *kargs) if (!access_ok((void __user *)kargs->stack, kargs->stack_size)) return false; -#if !defined(CONFIG_STACK_GROWSUP) +#if !defined(CONFIG_STACK_GROWSUP) && !defined(CONFIG_IA64) kargs->stack += kargs->stack_size; #endif } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 9116bcc90346..af45abac9784 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -10278,9 +10278,9 @@ void normalize_rt_tasks(void) #endif /* CONFIG_MAGIC_SYSRQ */ -#if defined(CONFIG_KGDB_KDB) +#if defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) /* - * These functions are only useful for kdb. + * These functions are only useful for the IA64 MCA handling, or kdb. * * They can only be called when the whole system has been * stopped - every CPU needs to be quiescent, and no scheduling @@ -10302,7 +10302,30 @@ struct task_struct *curr_task(int cpu) return cpu_curr(cpu); } -#endif /* defined(CONFIG_KGDB_KDB) */ +#endif /* defined(CONFIG_IA64) || defined(CONFIG_KGDB_KDB) */ + +#ifdef CONFIG_IA64 +/** + * ia64_set_curr_task - set the current task for a given CPU. + * @cpu: the processor in question. + * @p: the task pointer to set. + * + * Description: This function must only be used when non-maskable interrupts + * are serviced on a separate stack. It allows the architecture to switch the + * notion of the current task on a CPU in a non-blocking manner. This function + * must be called with all CPU's synchronized, and interrupts disabled, the + * and caller must save the original value of the current task (see + * curr_task() above) and restore that value before reenabling interrupts and + * re-starting the system. + * + * ONLY VALID WHEN THE WHOLE SYSTEM IS STOPPED! + */ +void ia64_set_curr_task(int cpu, struct task_struct *p) +{ + cpu_curr(cpu) = p; +} + +#endif #ifdef CONFIG_CGROUP_SCHED /* task_group_lock serializes the addition/removal of task groups */ diff --git a/kernel/signal.c b/kernel/signal.c index c9c57d053ce4..de4794ef0f57 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1713,8 +1713,9 @@ void force_sigsegv(int sig) force_sig(SIGSEGV); } -int force_sig_fault_to_task(int sig, int code, void __user *addr, - struct task_struct *t) +int force_sig_fault_to_task(int sig, int code, void __user *addr + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) + , struct task_struct *t) { struct kernel_siginfo info; @@ -1723,15 +1724,24 @@ int force_sig_fault_to_task(int sig, int code, void __user *addr, info.si_errno = 0; info.si_code = code; info.si_addr = addr; +#ifdef __ia64__ + info.si_imm = imm; + info.si_flags = flags; + info.si_isr = isr; +#endif return force_sig_info_to_task(&info, t, HANDLER_CURRENT); } -int force_sig_fault(int sig, int code, void __user *addr) +int force_sig_fault(int sig, int code, void __user *addr + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)) { - return force_sig_fault_to_task(sig, code, addr, current); + return force_sig_fault_to_task(sig, code, addr + ___ARCH_SI_IA64(imm, flags, isr), current); } -int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t) +int send_sig_fault(int sig, int code, void __user *addr + ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr) + , struct task_struct *t) { struct kernel_siginfo info; @@ -1740,6 +1750,11 @@ int send_sig_fault(int sig, int code, void __user *addr, struct task_struct *t) info.si_errno = 0; info.si_code = code; info.si_addr = addr; +#ifdef __ia64__ + info.si_imm = imm; + info.si_flags = flags; + info.si_isr = isr; +#endif return send_sig_info(info.si_signo, &info, t); } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 157f7ce2942d..2b6585751891 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1939,6 +1939,15 @@ static struct ctl_table kern_table[] = { .proc_handler = proc_dointvec, }, #endif +#ifdef CONFIG_IA64 + { + .procname = "unaligned-dump-stack", + .data = &unaligned_dump_stack, + .maxlen = sizeof (int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, +#endif #ifdef CONFIG_RT_MUTEXES { .procname = "max_lock_depth", diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index ef36b829ae1f..a421b1cbf160 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -763,7 +763,7 @@ config SHRINKER_DEBUG config DEBUG_STACK_USAGE bool "Stack utilization instrumentation" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !IA64 help Enables the display of the minimum amount of free stack which each task has ever had available in the sysrq-T and sysrq-P debug output. diff --git a/lib/decompress_unxz.c b/lib/decompress_unxz.c index 842894158944..353268b9f129 100644 --- a/lib/decompress_unxz.c +++ b/lib/decompress_unxz.c @@ -133,6 +133,9 @@ #ifdef CONFIG_ARM # define XZ_DEC_ARM #endif +#ifdef CONFIG_IA64 +# define XZ_DEC_IA64 +#endif #ifdef CONFIG_SPARC # define XZ_DEC_SPARC #endif diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 1c5420ff254e..035b0a4db476 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -2,7 +2,7 @@ obj-$(CONFIG_RAID6_PQ) += raid6_pq.o raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ - int8.o + int8.o int16.o int32.o raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o \ @@ -55,7 +55,7 @@ endif quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(AWK) -v N=$* -f $(srctree)/$(src)/unroll.awk < $< > $@ -targets += int1.c int2.c int4.c int8.c +targets += int1.c int2.c int4.c int8.c int16.c int32.c $(obj)/int%.c: $(src)/int.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index cd2e88ee1f14..0ec534faf019 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -80,6 +80,10 @@ const struct raid6_calls * const raid6_algos[] = { #ifdef CONFIG_CPU_HAS_LSX &raid6_lsx, #endif +#endif +#if defined(__ia64__) + &raid6_intx32, + &raid6_intx16, #endif &raid6_intx8, &raid6_intx4, diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 1ba56c3fa482..558aeac9342a 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -41,6 +41,13 @@ typedef u32 unative_t; +/* + * IA-64 wants insane amounts of unrolling. On other architectures that + * is just a waste of space. + */ +#if ($# <= 8) || defined(__ia64__) + + /* * These sub-operations are separate inlines since they can sometimes be * specially optimized using architecture-specific hacks. @@ -145,3 +152,5 @@ const struct raid6_calls raid6_intx$# = { "int" NSTRING "x$#", 0 }; + +#endif diff --git a/lib/xz/Kconfig b/lib/xz/Kconfig index aef086a6bf2f..adce22ac18d6 100644 --- a/lib/xz/Kconfig +++ b/lib/xz/Kconfig @@ -19,6 +19,11 @@ config XZ_DEC_POWERPC default y select XZ_DEC_BCJ +config XZ_DEC_IA64 + bool "IA-64 BCJ filter decoder" if EXPERT + default y + select XZ_DEC_BCJ + config XZ_DEC_ARM bool "ARM BCJ filter decoder" if EXPERT default y diff --git a/mm/Kconfig b/mm/Kconfig index ffc3a2ba3a8c..25bf2b199fb3 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -732,7 +732,7 @@ config DEFAULT_MMAP_MIN_ADDR from userspace allocation. Keeping a user from writing to low pages can help reduce the impact of kernel NULL pointer bugs. - For most ppc64 and x86 users with lots of address space + For most ia64, ppc64 and x86 users with lots of address space a value of 65536 is reasonable and should cause no problems. On arm and other archs it should not be higher than 32768. Programs which use vm86 functionality or have some need to map diff --git a/mm/memory.c b/mm/memory.c index 0bfc8b007c01..84b6a3f6355e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -123,7 +123,9 @@ static bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) /* * A number of key systems in x86 including ioremap() rely on the assumption * that high_memory defines the upper bound on direct map memory, then end - * of ZONE_NORMAL. + * of ZONE_NORMAL. Under CONFIG_DISCONTIG this means that max_low_pfn and + * highstart_pfn must be the same; there must be no gap between ZONE_NORMAL + * and ZONE_HIGHMEM. */ void *high_memory; EXPORT_SYMBOL(high_memory); diff --git a/mm/mm_init.c b/mm/mm_init.c index 2c19f5515e36..abd517302d75 100644 --- a/mm/mm_init.c +++ b/mm/mm_init.c @@ -1473,7 +1473,8 @@ void __init set_pageblock_order(void) /* * Assume the largest contiguous order of interest is a huge page. - * This value may be variable depending on boot parameters on powerpc. + * This value may be variable depending on boot parameters on IA64 and + * powerpc. */ pageblock_order = order; } @@ -1634,8 +1635,8 @@ void __init *memmap_alloc(phys_addr_t size, phys_addr_t align, #ifdef CONFIG_FLATMEM static void __init alloc_node_mem_map(struct pglist_data *pgdat) { - unsigned long start, offset, size, end; - struct page *map; + unsigned long __maybe_unused start = 0; + unsigned long __maybe_unused offset = 0; /* Skip empty nodes */ if (!pgdat->node_spanned_pages) @@ -1643,24 +1644,34 @@ static void __init alloc_node_mem_map(struct pglist_data *pgdat) start = pgdat->node_start_pfn & ~(MAX_ORDER_NR_PAGES - 1); offset = pgdat->node_start_pfn - start; - /* - * The zone's endpoints aren't required to be MAX_PAGE_ORDER - * aligned but the node_mem_map endpoints must be in order - * for the buddy allocator to function correctly. - */ - end = ALIGN(pgdat_end_pfn(pgdat), MAX_ORDER_NR_PAGES); - size = (end - start) * sizeof(struct page); - map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, - pgdat->node_id, false); - if (!map) - panic("Failed to allocate %ld bytes for node %d memory map\n", - size, pgdat->node_id); - pgdat->node_mem_map = map + offset; + /* ia64 gets its own node_mem_map, before this, without bootmem */ + if (!pgdat->node_mem_map) { + unsigned long size, end; + struct page *map; + + /* + * The zone's endpoints aren't required to be MAX_ORDER + * aligned but the node_mem_map endpoints must be in order + * for the buddy allocator to function correctly. + */ + end = pgdat_end_pfn(pgdat); + end = ALIGN(end, MAX_ORDER_NR_PAGES); + size = (end - start) * sizeof(struct page); + map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT, + pgdat->node_id, false); + if (!map) + panic("Failed to allocate %ld bytes for node %d memory map\n", + size, pgdat->node_id); + pgdat->node_mem_map = map + offset; + } + pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", - __func__, pgdat->node_id, (unsigned long)pgdat, - (unsigned long)pgdat->node_mem_map); + __func__, pgdat->node_id, (unsigned long)pgdat, + (unsigned long)pgdat->node_mem_map); #ifndef CONFIG_NUMA - /* the global mem_map is just set as node 0's */ + /* + * With no DISCONTIG, the global mem_map is just set as node 0's + */ if (pgdat == NODE_DATA(0)) { mem_map = NODE_DATA(0)->node_mem_map; if (page_to_pfn(mem_map) != pgdat->node_start_pfn) diff --git a/mm/mmap.c b/mm/mmap.c index 3281287771c9..f6b4a28e8dfe 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1962,9 +1962,9 @@ static int acct_stack_growth(struct vm_area_struct *vma, return 0; } -#if defined(CONFIG_STACK_GROWSUP) +#if defined(CONFIG_STACK_GROWSUP) || defined(CONFIG_IA64) /* - * PA-RISC uses this for its stack. + * PA-RISC uses this for its stack; IA64 for its Register Backing Store. * vma is the last one with address > vma->vm_end. Have to extend vma. */ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) @@ -2061,7 +2061,7 @@ static int expand_upwards(struct vm_area_struct *vma, unsigned long address) validate_mm(mm); return error; } -#endif /* CONFIG_STACK_GROWSUP */ +#endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ /* * vma is the first one with address < vma->vm_start. Have to extend vma. @@ -2220,7 +2220,42 @@ struct vm_area_struct *find_extend_vma_locked(struct mm_struct *mm, unsigned lon } #endif -#if defined(CONFIG_STACK_GROWSUP) +/* + * IA64 has some horrid mapping rules: it can expand both up and down, + * but with various special rules. + * + * We'll get rid of this architecture eventually, so the ugliness is + * temporary. + */ +#ifdef CONFIG_IA64 +static inline bool vma_expand_ok(struct vm_area_struct *vma, unsigned long addr) +{ + return REGION_NUMBER(addr) == REGION_NUMBER(vma->vm_start) && + REGION_OFFSET(addr) < RGN_MAP_LIMIT; +} + +/* + * IA64 stacks grow down, but there's a special register backing store + * that can grow up. Only sequentially, though, so the new address must + * match vm_end. + */ +static inline int vma_expand_up(struct vm_area_struct *vma, unsigned long addr) +{ + if (!vma_expand_ok(vma, addr)) + return -EFAULT; + if (vma->vm_end != (addr & PAGE_MASK)) + return -EFAULT; + return expand_upwards(vma, addr); +} + +static inline bool vma_expand_down(struct vm_area_struct *vma, unsigned long addr) +{ + if (!vma_expand_ok(vma, addr)) + return -EFAULT; + return expand_downwards(vma, addr); +} + +#elif defined(CONFIG_STACK_GROWSUP) #define vma_expand_up(vma,addr) expand_upwards(vma, addr) #define vma_expand_down(vma, addr) (-EFAULT) diff --git a/mm/page_owner.c b/mm/page_owner.c index 5634e5d890f8..1256b3847db2 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -121,6 +121,7 @@ static noinline depot_stack_handle_t save_stack(gfp_t flags) * Sometimes page metadata allocation tracking requires more * memory to be allocated: * - when new stack trace is saved to stack depot + * - when backtrace itself is calculated (ia64) */ if (current->in_page_owner) return dummy_handle; diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h new file mode 100644 index 000000000000..6fffe5682713 --- /dev/null +++ b/tools/arch/ia64/include/asm/barrier.h @@ -0,0 +1,59 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copied from the kernel sources to tools/: + * + * Memory barrier definitions. This is based on information published + * in the Processor Abstraction Layer and the System Abstraction Layer + * manual. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang + * Copyright (C) 1999 Asit Mallick + * Copyright (C) 1999 Don Dugger + */ +#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H +#define _TOOLS_LINUX_ASM_IA64_BARRIER_H + +#include + +/* + * Macros to force memory ordering. In these descriptions, "previous" + * and "subsequent" refer to program order; "visible" means that all + * architecturally visible effects of a memory access have occurred + * (at a minimum, this means the memory has been read or written). + * + * wmb(): Guarantees that all preceding stores to memory- + * like regions are visible before any subsequent + * stores and that all following stores will be + * visible only after all previous stores. + * rmb(): Like wmb(), but for reads. + * mb(): wmb()/rmb() combo, i.e., all previous memory + * accesses are visible before all subsequent + * accesses and vice versa. This is also known as + * a "fence." + * + * Note: "mb()" and its variants cannot be used as a fence to order + * accesses to memory mapped I/O registers. For that, mf.a needs to + * be used. However, we don't want to always use mf.a because (a) + * it's (presumably) much slower than mf and (b) mf.a is supported for + * sequential memory pages only. + */ + +#define mb() ia64_mf() +#define rmb() mb() +#define wmb() mb() + +#define smp_store_release(p, v) \ +do { \ + barrier(); \ + WRITE_ONCE(*p, v); \ +} while (0) + +#define smp_load_acquire(p) \ +({ \ + typeof(*p) ___p1 = READ_ONCE(*p); \ + barrier(); \ + ___p1; \ +}) + +#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */ diff --git a/tools/arch/ia64/include/uapi/asm/bitsperlong.h b/tools/arch/ia64/include/uapi/asm/bitsperlong.h new file mode 100644 index 000000000000..1146d55563db --- /dev/null +++ b/tools/arch/ia64/include/uapi/asm/bitsperlong.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef __ASM_IA64_BITSPERLONG_H +#define __ASM_IA64_BITSPERLONG_H + +#define __BITS_PER_LONG 64 + +#include + +#endif /* __ASM_IA64_BITSPERLONG_H */ diff --git a/tools/arch/ia64/include/uapi/asm/mman.h b/tools/arch/ia64/include/uapi/asm/mman.h new file mode 100644 index 000000000000..2a19bb1db4ab --- /dev/null +++ b/tools/arch/ia64/include/uapi/asm/mman.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H +#define TOOLS_ARCH_IA64_UAPI_ASM_MMAN_FIX_H +#include +/* MAP_32BIT is undefined on ia64, fix it for perf */ +#define MAP_32BIT 0 +#endif diff --git a/usr/include/Makefile b/usr/include/Makefile index 338c81f1fcf3..07796df0a295 100644 --- a/usr/include/Makefile +++ b/usr/include/Makefile @@ -59,6 +59,12 @@ ifeq ($(SRCARCH),arc) no-header-test += linux/bpf_perf_event.h endif +ifeq ($(SRCARCH),ia64) +no-header-test += asm/setup.h +no-header-test += asm/sigcontext.h +no-header-test += linux/if_bonding.h +endif + ifeq ($(SRCARCH),powerpc) no-header-test += linux/bpf_perf_event.h endif