# --- T2-COPYRIGHT-BEGIN ---
# t2/package/*/glibc/0000.patch.ia64
# Copyright (C) 2025 The T2 SDE Project
# SPDX-License-Identifier: GPL-2.0 or patched project license
# --- T2-COPYRIGHT-END ---
#
# Original patch retrieved from: https://github.com/linux-ia64/glibc-ia64.git
#
# Command used: $ git diff origin/release/2.42/master origin/release/2.42/master-epic
# Commit ID reference:
# origin/release/2.42/master: bdea6c37197a3c9bd976911cce5f580dea1c28dd
# origin/release/2.42/master-epic: 25a85da089bf3cba49c38cf177ec97cbae171ae5
diff --git a/INSTALL b/INSTALL
index 1fd8d561b0..bc1bd3f972 100644
--- a/INSTALL
+++ b/INSTALL
@@ -619,15 +619,17 @@ Specific advice for GNU/Linux systems
If you are installing the GNU C Library on GNU/Linux systems, you need
to have the header files from a 3.2 or newer kernel around for
-reference. These headers must be installed using 'make
-headers_install'; the headers present in the kernel source directory are
-not suitable for direct use by the GNU C Library. You do not need to
-use that kernel, just have its headers installed where the GNU C Library
-can access them, referred to here as INSTALL-DIRECTORY. The easiest way
-to do this is to unpack it in a directory such as
-'/usr/src/linux-VERSION'. In that directory, run 'make headers_install
-INSTALL_HDR_PATH=INSTALL-DIRECTORY'. Finally, configure the GNU C
-Library with the option '--with-headers=INSTALL-DIRECTORY/include'. Use
+reference. (For the ia64 architecture, you need version 3.2.18 or newer
+because this is the first version with support for the ‘accept4’ system
+call.) These headers must be installed using ‘make headers_install’;
+the headers present in the kernel source directory are not suitable for
+direct use by the GNU C Library. You do not need to use that kernel,
+just have its headers installed where the GNU C Library can access them,
+referred to here as INSTALL-DIRECTORY. The easiest way to do this is to
+unpack it in a directory such as ‘/usr/src/linux-VERSION’. In that
+directory, run ‘make headers_install
+INSTALL_HDR_PATH=INSTALL-DIRECTORY’. Finally, configure the GNU C
+Library with the option ‘--with-headers=INSTALL-DIRECTORY/include’. Use
the most recent kernel you can get your hands on. (If you are
cross-compiling the GNU C Library, you need to specify
'ARCH=ARCHITECTURE' in the 'make headers_install' command, where
diff --git a/NEWS b/NEWS
index f0b0e924a4..beaf84dc75 100644
--- a/NEWS
+++ b/NEWS
@@ -790,8 +790,6 @@ Deprecated and removed features, and other changes affecting compatibility:
replacement library or its dependencies use dynamic TLS instead of
initial-exec TLS.
-* The ia64*-*-linux-gnu configurations are no longer supported.
-
Changes to build and runtime requirements:
* Building on LoongArch requires at a minimum binutils 2.41 for vector
diff --git a/README b/README
index 2e360eb70a..9532b97986 100644
--- a/README
+++ b/README
@@ -30,6 +30,7 @@ The GNU C Library supports these configurations for using Linux kernels:
hppa-*-linux-gnu
i[4567]86-*-linux-gnu
x86_64-*-linux-gnu Can build either x86_64 or x32
+ ia64-*-linux-gnu
loongarch64-*-linux-gnu Hardware floating point, LE only.
m68k-*-linux-gnu
microblaze*-*-linux-gnu
diff --git a/conform/data/signal.h-data b/conform/data/signal.h-data
index 4beb3d8515..a871ac1e7b 100644
--- a/conform/data/signal.h-data
+++ b/conform/data/signal.h-data
@@ -24,7 +24,8 @@ type mcontext_t
type ucontext_t
element ucontext_t {ucontext_t*} uc_link
-element ucontext_t sigset_t uc_sigmask
+// Bug 21634: uc_sigmask has wrong type.
+xfail[ia64-linux]-element ucontext_t sigset_t uc_sigmask
element ucontext_t stack_t uc_stack
// Bug 21635: uc_mcontext has wrong type.
xfail[powerpc32-linux]-element ucontext_t mcontext_t uc_mcontext
@@ -137,7 +138,8 @@ constant SIGSTKSZ
type ucontext_t
element ucontext_t {ucontext_t*} uc_link
-element ucontext_t sigset_t uc_sigmask
+// Bug 21634: uc_sigmask has wrong type.
+xfail[ia64-linux]-element ucontext_t sigset_t uc_sigmask
element ucontext_t stack_t uc_stack
// Bug 21635: uc_mcontext has wrong type.
xfail[powerpc32-linux]-element ucontext_t mcontext_t uc_mcontext
diff --git a/conform/data/ucontext.h-data b/conform/data/ucontext.h-data
index 414635f664..c1f3f86ab3 100644
--- a/conform/data/ucontext.h-data
+++ b/conform/data/ucontext.h-data
@@ -4,7 +4,8 @@ type mcontext_t
type ucontext_t
element ucontext_t {ucontext_t*} uc_link
-element ucontext_t sigset_t uc_sigmask
+// Bug 21634: uc_sigmask has wrong type.
+xfail[ia64-linux]-element ucontext_t sigset_t uc_sigmask
element ucontext_t stack_t uc_stack
// Bug 21635: uc_mcontext has wrong type.
xfail[powerpc32-linux]-element ucontext_t mcontext_t uc_mcontext
diff --git a/elf/cache.c b/elf/cache.c
index 89a6a1e49a..ab029d77cb 100644
--- a/elf/cache.c
+++ b/elf/cache.c
@@ -179,6 +179,9 @@ print_entry (const char *lib, int flag, uint64_t hwcap,
case FLAG_SPARC_LIB64:
fputs (",64bit", stdout);
break;
+ case FLAG_IA64_LIB64:
+ fputs (",IA-64", stdout);
+ break;
case FLAG_X8664_LIB64:
fputs (",x86-64", stdout);
break;
diff --git a/elf/dl-fptr.c b/elf/dl-fptr.c
new file mode 100644
index 0000000000..575406ae44
--- /dev/null
+++ b/elf/dl-fptr.c
@@ -0,0 +1,322 @@
+/* Manage function descriptors. Generic version.
+ Copyright (C) 1999-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifndef ELF_MACHINE_BOOT_FPTR_TABLE_LEN
+/* ELF_MACHINE_BOOT_FPTR_TABLE_LEN should be greater than the number of
+ dynamic symbols in ld.so. */
+# define ELF_MACHINE_BOOT_FPTR_TABLE_LEN 256
+#endif
+
+#ifndef ELF_MACHINE_LOAD_ADDRESS
+# error "ELF_MACHINE_LOAD_ADDRESS is not defined."
+#endif
+
+#ifndef COMPARE_AND_SWAP
+# define COMPARE_AND_SWAP(ptr, old, new) \
+ (catomic_compare_and_exchange_bool_acq (ptr, new, old) == 0)
+#endif
+
+ElfW(Addr) _dl_boot_fptr_table [ELF_MACHINE_BOOT_FPTR_TABLE_LEN];
+
+static struct local
+ {
+ struct fdesc_table *root;
+ struct fdesc *free_list;
+ unsigned int npages; /* # of pages to allocate */
+ /* the next to members MUST be consecutive! */
+ struct fdesc_table boot_table;
+ struct fdesc boot_fdescs[1024];
+ }
+local =
+ {
+ .root = &local.boot_table,
+ .npages = 2,
+ .boot_table =
+ {
+ .len = sizeof (local.boot_fdescs) / sizeof (local.boot_fdescs[0]),
+ .first_unused = 0
+ }
+ };
+
+/* Create a new fdesc table and return a pointer to the first fdesc
+ entry. The fdesc lock must have been acquired already. */
+
+static struct fdesc_table *
+new_fdesc_table (struct local *l, size_t *size)
+{
+ size_t old_npages = l->npages;
+ size_t new_npages = old_npages + old_npages;
+ struct fdesc_table *new_table;
+
+ /* If someone has just created a new table, we return NULL to tell
+ the caller to use the new table. */
+ if (! COMPARE_AND_SWAP (&l->npages, old_npages, new_npages))
+ return (struct fdesc_table *) NULL;
+
+ *size = old_npages * GLRO(dl_pagesize);
+ new_table = __mmap (NULL, *size,
+ PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0);
+ if (new_table == MAP_FAILED)
+ _dl_signal_error (errno, NULL, NULL,
+ N_("cannot map pages for fdesc table"));
+
+ new_table->len
+ = (*size - sizeof (*new_table)) / sizeof (struct fdesc);
+ new_table->first_unused = 1;
+ return new_table;
+}
+
+
+static ElfW(Addr)
+make_fdesc (ElfW(Addr) ip, ElfW(Addr) gp)
+{
+ struct fdesc *fdesc = NULL;
+ struct fdesc_table *root;
+ unsigned int old;
+ struct local *l;
+
+ ELF_MACHINE_LOAD_ADDRESS (l, local);
+
+ retry:
+ root = l->root;
+ while (1)
+ {
+ old = root->first_unused;
+ if (old >= root->len)
+ break;
+ else if (COMPARE_AND_SWAP (&root->first_unused, old, old + 1))
+ {
+ fdesc = &root->fdesc[old];
+ goto install;
+ }
+ }
+
+ if (l->free_list)
+ {
+ /* Get it from free-list. */
+ do
+ {
+ fdesc = l->free_list;
+ if (fdesc == NULL)
+ goto retry;
+ }
+ while (! COMPARE_AND_SWAP ((ElfW(Addr) *) &l->free_list,
+ (ElfW(Addr)) fdesc, fdesc->ip));
+ }
+ else
+ {
+ /* Create a new fdesc table. */
+ size_t size;
+ struct fdesc_table *new_table = new_fdesc_table (l, &size);
+
+ if (new_table == NULL)
+ goto retry;
+
+ new_table->next = root;
+ if (! COMPARE_AND_SWAP ((ElfW(Addr) *) &l->root,
+ (ElfW(Addr)) root,
+ (ElfW(Addr)) new_table))
+ {
+ /* Someone has just installed a new table. Return NULL to
+ tell the caller to use the new table. */
+ __munmap (new_table, size);
+ goto retry;
+ }
+
+ /* Note that the first entry was reserved while allocating the
+ memory for the new page. */
+ fdesc = &new_table->fdesc[0];
+ }
+
+ install:
+ fdesc->ip = ip;
+ fdesc->gp = gp;
+
+ return (ElfW(Addr)) fdesc;
+}
+
+
+static inline ElfW(Addr) * __attribute__ ((always_inline))
+make_fptr_table (struct link_map *map)
+{
+ const ElfW(Sym) *symtab
+ = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+ const char *strtab = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+ ElfW(Addr) *fptr_table;
+ size_t size;
+ size_t len;
+
+ /* XXX Apparently the only way to find out the size of the dynamic
+ symbol section is to assume that the string table follows right
+ afterwards... */
+ len = ((strtab - (char *) symtab)
+ / map->l_info[DT_SYMENT]->d_un.d_val);
+ size = ((len * sizeof (fptr_table[0]) + GLRO(dl_pagesize) - 1)
+ & -GLRO(dl_pagesize));
+ /* XXX We don't support here in the moment systems without MAP_ANON.
+ There probably are none for IA-64. In case this is proven wrong
+ we will have to open /dev/null here and use the file descriptor
+ instead of the hard-coded -1. */
+ fptr_table = __mmap (NULL, size,
+ PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE,
+ -1, 0);
+ if (fptr_table == MAP_FAILED)
+ _dl_signal_error (errno, NULL, NULL,
+ N_("cannot map pages for fptr table"));
+
+ if (COMPARE_AND_SWAP ((ElfW(Addr) *) &map->l_mach.fptr_table,
+ (ElfW(Addr)) NULL, (ElfW(Addr)) fptr_table))
+ map->l_mach.fptr_table_len = len;
+ else
+ __munmap (fptr_table, len * sizeof (fptr_table[0]));
+
+ return map->l_mach.fptr_table;
+}
+
+
+ElfW(Addr)
+_dl_make_fptr (struct link_map *map, const ElfW(Sym) *sym,
+ ElfW(Addr) ip)
+{
+ ElfW(Addr) *ftab = map->l_mach.fptr_table;
+ const ElfW(Sym) *symtab;
+ Elf_Symndx symidx;
+ struct local *l;
+
+ if (__glibc_unlikely (ftab == NULL))
+ ftab = make_fptr_table (map);
+
+ symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
+ symidx = sym - symtab;
+
+ if (symidx >= map->l_mach.fptr_table_len)
+ _dl_signal_error (0, NULL, NULL,
+ N_("internal error: symidx out of range of fptr table"));
+
+ while (ftab[symidx] == 0)
+ {
+ /* GOT has already been relocated in elf_get_dynamic_info -
+ don't try to relocate it again. */
+ ElfW(Addr) fdesc
+ = make_fdesc (ip, map->l_info[DT_PLTGOT]->d_un.d_ptr);
+
+ if (__builtin_expect (COMPARE_AND_SWAP (&ftab[symidx], (ElfW(Addr)) NULL,
+ fdesc), 1))
+ {
+ /* No one has updated the entry and the new function
+ descriptor has been installed. */
+#if 0
+ const char *strtab
+ = (const void *) D_PTR (map, l_info[DT_STRTAB]);
+
+ ELF_MACHINE_LOAD_ADDRESS (l, local);
+ if (l->root != &l->boot_table
+ || l->boot_table.first_unused > 20)
+ _dl_debug_printf ("created fdesc symbol `%s' at %lx\n",
+ strtab + sym->st_name, ftab[symidx]);
+#endif
+ break;
+ }
+ else
+ {
+ /* We created a duplicated function descriptor. We put it on
+ free-list. */
+ struct fdesc *f = (struct fdesc *) fdesc;
+
+ ELF_MACHINE_LOAD_ADDRESS (l, local);
+
+ do
+ f->ip = (ElfW(Addr)) l->free_list;
+ while (! COMPARE_AND_SWAP ((ElfW(Addr) *) &l->free_list,
+ f->ip, fdesc));
+ }
+ }
+
+ return ftab[symidx];
+}
+
+
+void
+_dl_unmap (struct link_map *map)
+{
+ ElfW(Addr) *ftab = map->l_mach.fptr_table;
+ struct fdesc *head = NULL, *tail = NULL;
+ size_t i;
+
+ _dl_unmap_segments (map);
+
+ if (ftab == NULL)
+ return;
+
+ /* String together the fdesc structures that are being freed. */
+ for (i = 0; i < map->l_mach.fptr_table_len; ++i)
+ {
+ if (ftab[i])
+ {
+ *(struct fdesc **) ftab[i] = head;
+ head = (struct fdesc *) ftab[i];
+ if (tail == NULL)
+ tail = head;
+ }
+ }
+
+ /* Prepend the new list to the free_list: */
+ if (tail)
+ do
+ tail->ip = (ElfW(Addr)) local.free_list;
+ while (! COMPARE_AND_SWAP ((ElfW(Addr) *) &local.free_list,
+ tail->ip, (ElfW(Addr)) head));
+
+ __munmap (ftab, (map->l_mach.fptr_table_len
+ * sizeof (map->l_mach.fptr_table[0])));
+
+ map->l_mach.fptr_table = NULL;
+}
+
+
+ElfW(Addr)
+_dl_lookup_address (const void *address)
+{
+ ElfW(Addr) addr = (ElfW(Addr)) address;
+ struct fdesc_table *t;
+ unsigned long int i;
+
+ for (t = local.root; t != NULL; t = t->next)
+ {
+ i = (struct fdesc *) addr - &t->fdesc[0];
+ if (i < t->first_unused && addr == (ElfW(Addr)) &t->fdesc[i])
+ {
+ addr = t->fdesc[i].ip;
+ break;
+ }
+ }
+
+ return addr;
+}
diff --git a/elf/dl-load.c b/elf/dl-load.c
index 00b9da9ec7..787cb2ec42 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -756,7 +756,7 @@ _dl_init_paths (const char *llp, const char *source,
l = GL(dl_ns)[LM_ID_BASE]._ns_loaded;
#ifdef SHARED
if (l == NULL)
- l = &_dl_rtld_map;
+ l = &GL (dl_rtld_map);
#endif
assert (l->l_type != lt_loaded);
@@ -1016,8 +1016,8 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
/* When loading into a namespace other than the base one we must
avoid loading ld.so since there can only be one copy. Ever. */
if (__glibc_unlikely (nsid != LM_ID_BASE)
- && (_dl_file_id_match_p (&id, &_dl_rtld_map.l_file_id)
- || _dl_name_match_p (name, &_dl_rtld_map)))
+ && (_dl_file_id_match_p (&id, &GL(dl_rtld_map).l_file_id)
+ || _dl_name_match_p (name, &GL(dl_rtld_map))))
{
/* This is indeed ld.so. Create a new link_map which refers to
the real one for almost everything. */
@@ -1026,7 +1026,7 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
goto fail_new;
/* Refer to the real descriptor. */
- l->l_real = &_dl_rtld_map;
+ l->l_real = &GL(dl_rtld_map);
/* Copy l_addr and l_ld to avoid a GDB warning with dlmopen(). */
l->l_addr = l->l_real->l_addr;
@@ -2054,7 +2054,7 @@ _dl_map_new_object (struct link_map *loader, const char *name,
l = (loader
?: GL(dl_ns)[LM_ID_BASE]._ns_loaded
# ifdef SHARED
- ?: &_dl_rtld_map
+ ?: &GL(dl_rtld_map)
# endif
);
diff --git a/elf/dl-symaddr.c b/elf/dl-symaddr.c
new file mode 100644
index 0000000000..b0299fdb35
--- /dev/null
+++ b/elf/dl-symaddr.c
@@ -0,0 +1,33 @@
+/* Get the symbol address. Generic version.
+ Copyright (C) 1999-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+#include
+
+void *
+_dl_symbol_address (struct link_map *map, const ElfW(Sym) *ref)
+{
+ ElfW(Addr) value = SYMBOL_ADDRESS (map, ref, false);
+
+ /* Return the pointer to function descriptor. */
+ if (ELFW(ST_TYPE) (ref->st_info) == STT_FUNC)
+ return (void *) _dl_make_fptr (map, ref, value);
+ else
+ return (void *) value;
+}
+rtld_hidden_def (_dl_symbol_address)
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
index a083a82933..d0c5840f3e 100644
--- a/elf/dl-tls.c
+++ b/elf/dl-tls.c
@@ -740,6 +740,23 @@ rtld_hidden_def (_dl_deallocate_tls)
#ifdef SHARED
+/* The __tls_get_addr function has two basic forms which differ in the
+ arguments. The IA-64 form takes two parameters, the module ID and
+ offset. The form used, among others, on IA-32 takes a reference to
+ a special structure which contain the same information. The second
+ form seems to be more often used (in the moment) so we default to
+ it. Users of the IA-64 form have to provide adequate definitions
+ of the following macros. */
+# ifndef GET_ADDR_ARGS
+# define GET_ADDR_ARGS tls_index *ti
+# define GET_ADDR_PARAM ti
+# endif
+# ifndef GET_ADDR_MODULE
+# define GET_ADDR_MODULE ti->ti_module
+# endif
+# ifndef GET_ADDR_OFFSET
+# define GET_ADDR_OFFSET ti->ti_offset
+# endif
/* Allocate one DTV entry. */
static struct dtv_pointer
@@ -938,25 +955,25 @@ _dl_update_slotinfo (unsigned long int req_modid, size_t new_gen)
/* Adjust the TLS variable pointer using the TLS descriptor offset and
the ABI-specific offset. */
static inline void *
-tls_get_addr_adjust (void *from_dtv, tls_index *ti)
+tls_get_addr_adjust (void *from_dtv, GET_ADDR_ARGS)
{
/* Perform arithmetic in uintptr_t to avoid pointer wraparound
issues. The outer cast to uintptr_t suppresses a warning about
pointer/integer size mismatch on ILP32 targets with 64-bit
ti_offset. */
- return (void *) (uintptr_t) ((uintptr_t) from_dtv + ti->ti_offset
+ return (void *) (uintptr_t) ((uintptr_t) from_dtv + GET_ADDR_OFFSET
+ TLS_DTV_OFFSET);
}
static void *
__attribute_noinline__
-tls_get_addr_tail (tls_index *ti, dtv_t *dtv, struct link_map *the_map)
+tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
{
/* The allocation was deferred. Do it now. */
if (the_map == NULL)
{
/* Find the link map for this module. */
- size_t idx = ti->ti_module;
+ size_t idx = GET_ADDR_MODULE;
struct dtv_slotinfo_list *listp = GL(dl_tls_dtv_slotinfo_list);
while (idx >= listp->len)
@@ -993,35 +1010,35 @@ tls_get_addr_tail (tls_index *ti, dtv_t *dtv, struct link_map *the_map)
#endif
__rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
- dtv[ti->ti_module].pointer.to_free = NULL;
- dtv[ti->ti_module].pointer.val = p;
+ dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
+ dtv[GET_ADDR_MODULE].pointer.val = p;
- return tls_get_addr_adjust (p, ti);
+ return tls_get_addr_adjust (p, GET_ADDR_PARAM);
}
else
__rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
}
struct dtv_pointer result = allocate_and_init (the_map);
- dtv[ti->ti_module].pointer = result;
+ dtv[GET_ADDR_MODULE].pointer = result;
assert (result.to_free != NULL);
- return tls_get_addr_adjust (result.val, ti);
+ return tls_get_addr_adjust (result.val, GET_ADDR_PARAM);
}
static struct link_map *
__attribute_noinline__
-update_get_addr (tls_index *ti, size_t gen)
+update_get_addr (GET_ADDR_ARGS, size_t gen)
{
- struct link_map *the_map = _dl_update_slotinfo (ti->ti_module, gen);
+ struct link_map *the_map = _dl_update_slotinfo (GET_ADDR_MODULE, gen);
dtv_t *dtv = THREAD_DTV ();
- void *p = dtv[ti->ti_module].pointer.val;
+ void *p = dtv[GET_ADDR_MODULE].pointer.val;
if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
- return tls_get_addr_tail (ti, dtv, the_map);
+ return tls_get_addr_tail (GET_ADDR_PARAM, dtv, the_map);
- return tls_get_addr_adjust (p, ti);
+ return tls_get_addr_adjust (p, GET_ADDR_PARAM);
}
/* For all machines that have a non-macro version of __tls_get_addr, we
@@ -1030,7 +1047,7 @@ update_get_addr (tls_index *ti, size_t gen)
in ld.so for __tls_get_addr. */
#ifndef __tls_get_addr
-extern void * __tls_get_addr (tls_index *ti);
+extern void * __tls_get_addr (GET_ADDR_ARGS);
rtld_hidden_proto (__tls_get_addr)
rtld_hidden_def (__tls_get_addr)
#endif
@@ -1038,7 +1055,7 @@ rtld_hidden_def (__tls_get_addr)
/* The generic dynamic and local dynamic model cannot be used in
statically linked applications. */
void *
-__tls_get_addr (tls_index *ti)
+__tls_get_addr (GET_ADDR_ARGS)
{
dtv_t *dtv = THREAD_DTV ();
@@ -1050,7 +1067,7 @@ __tls_get_addr (tls_index *ti)
if (__glibc_unlikely (dtv[0].counter != gen))
{
if (_dl_tls_allocate_active ()
- && ti->ti_module < _dl_tls_initial_modid_limit)
+ && GET_ADDR_MODULE < _dl_tls_initial_modid_limit)
/* This is a reentrant __tls_get_addr call, but we can
satisfy it because it's an initially-loaded module ID.
These TLS slotinfo slots do not change, so the
@@ -1063,16 +1080,16 @@ __tls_get_addr (tls_index *ti)
/* Update DTV up to the global generation, see CONCURRENCY NOTES
in _dl_update_slotinfo. */
gen = atomic_load_acquire (&GL(dl_tls_generation));
- return update_get_addr (ti, gen);
+ return update_get_addr (GET_ADDR_PARAM, gen);
}
}
- void *p = dtv[ti->ti_module].pointer.val;
+ void *p = dtv[GET_ADDR_MODULE].pointer.val;
if (__glibc_unlikely (p == TLS_DTV_UNALLOCATED))
- return tls_get_addr_tail (ti, dtv, NULL);
+ return tls_get_addr_tail (GET_ADDR_PARAM, dtv, NULL);
- return tls_get_addr_adjust (p, ti);
+ return tls_get_addr_adjust (p, GET_ADDR_PARAM);
}
#endif /* SHARED */
diff --git a/elf/rtld.c b/elf/rtld.c
index 493f9696ea..60e7f852e1 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -379,8 +379,6 @@ struct rtld_global_ro _rtld_global_ro attribute_relro =
extern struct rtld_global_ro _rtld_local_ro
__attribute__ ((alias ("_rtld_global_ro"), visibility ("hidden")));
-struct link_map _dl_rtld_map;
-struct auditstate _dl_rtld_auditstate[DL_NNS];
static void dl_main (const ElfW(Phdr) *phdr, ElfW(Word) phnum,
ElfW(Addr) *user_entry, ElfW(auxv_t) *auxv);
@@ -467,23 +465,24 @@ _dl_start_final (void *arg, struct dl_start_final_info *info)
/* Transfer data about ourselves to the permanent link_map structure. */
#ifndef DONT_USE_BOOTSTRAP_MAP
- _dl_rtld_map.l_addr = info->l.l_addr;
- _dl_rtld_map.l_ld = info->l.l_ld;
- _dl_rtld_map.l_ld_readonly = info->l.l_ld_readonly;
- memcpy (_dl_rtld_map.l_info, info->l.l_info, sizeof _dl_rtld_map.l_info);
- _dl_rtld_map.l_mach = info->l.l_mach;
- _dl_rtld_map.l_relocated = 1;
+ GL(dl_rtld_map).l_addr = info->l.l_addr;
+ GL(dl_rtld_map).l_ld = info->l.l_ld;
+ GL(dl_rtld_map).l_ld_readonly = info->l.l_ld_readonly;
+ memcpy (GL(dl_rtld_map).l_info, info->l.l_info,
+ sizeof GL(dl_rtld_map).l_info);
+ GL(dl_rtld_map).l_mach = info->l.l_mach;
+ GL(dl_rtld_map).l_relocated = 1;
#endif
- _dl_setup_hash (&_dl_rtld_map);
- _dl_rtld_map.l_real = &_dl_rtld_map;
- _dl_rtld_map.l_map_start
+ _dl_setup_hash (&GL(dl_rtld_map));
+ GL(dl_rtld_map).l_real = &GL(dl_rtld_map);
+ GL(dl_rtld_map).l_map_start
= (ElfW(Addr)) DL_ADDRESS_WITHOUT_RELOC (&__ehdr_start);
- _dl_rtld_map.l_map_end
+ GL(dl_rtld_map).l_map_end
= (ElfW(Addr)) DL_ADDRESS_WITHOUT_RELOC (_end);
/* Copy the TLS related data if necessary. */
#ifndef DONT_USE_BOOTSTRAP_MAP
# if NO_TLS_OFFSET != 0
- _dl_rtld_map.l_tls_offset = NO_TLS_OFFSET;
+ GL(dl_rtld_map).l_tls_offset = NO_TLS_OFFSET;
# endif
#endif
@@ -510,7 +509,7 @@ _dl_start_final (void *arg, struct dl_start_final_info *info)
}
#ifdef DONT_USE_BOOTSTRAP_MAP
-# define bootstrap_map _dl_rtld_map
+# define bootstrap_map GL(dl_rtld_map)
#else
# define bootstrap_map info.l
#endif
@@ -1018,8 +1017,8 @@ ERROR: audit interface '%s' requires version %d (maximum supported version %d);
/* The dynamic linker link map is statically allocated, so the
cookie in _dl_new_object has not happened. */
- link_map_audit_state (&_dl_rtld_map, GLRO (dl_naudit))->cookie
- = (intptr_t) &_dl_rtld_map;
+ link_map_audit_state (&GL (dl_rtld_map), GLRO (dl_naudit))->cookie
+ = (intptr_t) &GL (dl_rtld_map);
++GLRO(dl_naudit);
@@ -1046,7 +1045,7 @@ load_audit_modules (struct link_map *main_map, struct audit_list *audit_list)
if (GLRO(dl_naudit) > 0)
{
_dl_audit_objopen (main_map, LM_ID_BASE);
- _dl_audit_objopen (&_dl_rtld_map, LM_ID_BASE);
+ _dl_audit_objopen (&GL(dl_rtld_map), LM_ID_BASE);
}
}
@@ -1056,7 +1055,7 @@ static void
rtld_chain_load (struct link_map *main_map, char *argv0)
{
/* The dynamic loader run against itself. */
- const char *rtld_soname = l_soname (&_dl_rtld_map);
+ const char *rtld_soname = l_soname (&GL(dl_rtld_map));
if (l_soname (main_map) != NULL
&& strcmp (rtld_soname, l_soname (main_map)) == 0)
_dl_fatal_printf ("%s: loader cannot load itself\n", rtld_soname);
@@ -1143,7 +1142,7 @@ rtld_setup_main_map (struct link_map *main_map)
_dl_rtld_libname.name = ((const char *) main_map->l_addr
+ ph->p_vaddr);
/* _dl_rtld_libname.next = NULL; Already zero. */
- _dl_rtld_map.l_libname = &_dl_rtld_libname;
+ GL(dl_rtld_map).l_libname = &_dl_rtld_libname;
has_interp = true;
break;
@@ -1225,16 +1224,16 @@ rtld_setup_main_map (struct link_map *main_map)
= (char *) main_map->l_tls_initimage + main_map->l_addr;
if (! main_map->l_map_end)
main_map->l_map_end = ~0;
- if (! _dl_rtld_map.l_libname && _dl_rtld_map.l_name)
+ if (! GL(dl_rtld_map).l_libname && GL(dl_rtld_map).l_name)
{
/* We were invoked directly, so the program might not have a
PT_INTERP. */
- _dl_rtld_libname.name = _dl_rtld_map.l_name;
+ _dl_rtld_libname.name = GL(dl_rtld_map).l_name;
/* _dl_rtld_libname.next = NULL; Already zero. */
- _dl_rtld_map.l_libname = &_dl_rtld_libname;
+ GL(dl_rtld_map).l_libname = &_dl_rtld_libname;
}
else
- assert (_dl_rtld_map.l_libname); /* How else did we get here? */
+ assert (GL(dl_rtld_map).l_libname); /* How else did we get here? */
return has_interp;
}
@@ -1346,7 +1345,7 @@ dl_main (const ElfW(Phdr) *phdr,
char **orig_argv = _dl_argv;
/* Note the place where the dynamic linker actually came from. */
- _dl_rtld_map.l_name = rtld_progname;
+ GL(dl_rtld_map).l_name = rtld_progname;
while (_dl_argc > 1)
if (! strcmp (_dl_argv[1], "--list"))
@@ -1630,22 +1629,22 @@ dl_main (const ElfW(Phdr) *phdr,
/* If the current libname is different from the SONAME, add the
latter as well. */
{
- const char *soname = l_soname (&_dl_rtld_map);
+ const char *soname = l_soname (&GL(dl_rtld_map));
if (soname != NULL
- && strcmp (_dl_rtld_map.l_libname->name, soname) != 0)
+ && strcmp (GL(dl_rtld_map).l_libname->name, soname) != 0)
{
static struct libname_list newname;
newname.name = soname;
newname.next = NULL;
newname.dont_free = 1;
- assert (_dl_rtld_map.l_libname->next == NULL);
- _dl_rtld_map.l_libname->next = &newname;
+ assert (GL(dl_rtld_map).l_libname->next == NULL);
+ GL(dl_rtld_map).l_libname->next = &newname;
}
}
/* The ld.so must be relocated since otherwise loading audit modules
will fail since they reuse the very same ld.so. */
- assert (_dl_rtld_map.l_relocated);
+ assert (GL(dl_rtld_map).l_relocated);
if (! rtld_is_main)
{
@@ -1675,7 +1674,7 @@ dl_main (const ElfW(Phdr) *phdr,
_exit (has_interp ? 0 : 2);
}
- struct link_map **first_preload = &_dl_rtld_map.l_next;
+ struct link_map **first_preload = &GL(dl_rtld_map).l_next;
/* Set up the data structures for the system-supplied DSO early,
so they can influence _dl_init_paths. */
setup_vdso (main_map, &first_preload);
@@ -1688,20 +1687,20 @@ dl_main (const ElfW(Phdr) *phdr,
call_init_paths (&state);
/* Initialize _r_debug_extended. */
- struct r_debug *r = _dl_debug_initialize (_dl_rtld_map.l_addr,
+ struct r_debug *r = _dl_debug_initialize (GL(dl_rtld_map).l_addr,
LM_ID_BASE);
r->r_state = RT_CONSISTENT;
/* Put the link_map for ourselves on the chain so it can be found by
name. Note that at this point the global chain of link maps contains
exactly one element, which is pointed to by dl_loaded. */
- if (! _dl_rtld_map.l_name)
+ if (! GL(dl_rtld_map).l_name)
/* If not invoked directly, the dynamic linker shared object file was
found by the PT_INTERP name. */
- _dl_rtld_map.l_name = (char *) _dl_rtld_map.l_libname->name;
- _dl_rtld_map.l_type = lt_library;
- main_map->l_next = &_dl_rtld_map;
- _dl_rtld_map.l_prev = main_map;
+ GL(dl_rtld_map).l_name = (char *) GL(dl_rtld_map).l_libname->name;
+ GL(dl_rtld_map).l_type = lt_library;
+ main_map->l_next = &GL(dl_rtld_map);
+ GL(dl_rtld_map).l_prev = main_map;
++GL(dl_ns)[LM_ID_BASE]._ns_nloaded;
++GL(dl_load_adds);
@@ -1719,8 +1718,8 @@ dl_main (const ElfW(Phdr) *phdr,
const ElfW(Phdr) *rtld_phdr = (const void *) rtld_ehdr + rtld_ehdr->e_phoff;
- _dl_rtld_map.l_phdr = rtld_phdr;
- _dl_rtld_map.l_phnum = rtld_ehdr->e_phnum;
+ GL(dl_rtld_map).l_phdr = rtld_phdr;
+ GL(dl_rtld_map).l_phnum = rtld_ehdr->e_phnum;
/* PT_GNU_RELRO is usually the last phdr. */
@@ -1728,15 +1727,15 @@ dl_main (const ElfW(Phdr) *phdr,
while (cnt-- > 0)
if (rtld_phdr[cnt].p_type == PT_GNU_RELRO)
{
- _dl_rtld_map.l_relro_addr = rtld_phdr[cnt].p_vaddr;
- _dl_rtld_map.l_relro_size = rtld_phdr[cnt].p_memsz;
+ GL(dl_rtld_map).l_relro_addr = rtld_phdr[cnt].p_vaddr;
+ GL(dl_rtld_map).l_relro_size = rtld_phdr[cnt].p_memsz;
break;
}
/* Add the dynamic linker to the TLS list if it also uses TLS. */
- if (_dl_rtld_map.l_tls_blocksize != 0)
+ if (GL(dl_rtld_map).l_tls_blocksize != 0)
/* Assign a module ID. Do this before loading any audit modules. */
- _dl_assign_tls_modid (&_dl_rtld_map);
+ _dl_assign_tls_modid (&GL(dl_rtld_map));
audit_list_add_dynamic_tag (&state.audit_list, main_map, DT_AUDIT);
audit_list_add_dynamic_tag (&state.audit_list, main_map, DT_DEPAUDIT);
@@ -1929,9 +1928,9 @@ dl_main (const ElfW(Phdr) *phdr,
main_map->l_searchlist.r_list[--i]->l_global = 1;
/* Remove _dl_rtld_map from the chain. */
- _dl_rtld_map.l_prev->l_next = _dl_rtld_map.l_next;
- if (_dl_rtld_map.l_next != NULL)
- _dl_rtld_map.l_next->l_prev = _dl_rtld_map.l_prev;
+ GL(dl_rtld_map).l_prev->l_next = GL(dl_rtld_map).l_next;
+ if (GL(dl_rtld_map).l_next != NULL)
+ GL(dl_rtld_map).l_next->l_prev = GL(dl_rtld_map).l_prev;
for (i = 1; i < main_map->l_searchlist.r_nlist; ++i)
if (is_rtld_link_map (main_map->l_searchlist.r_list[i]))
@@ -1941,17 +1940,17 @@ dl_main (const ElfW(Phdr) *phdr,
symbol search order because gdb uses the chain's order as its
symbol search order. */
- _dl_rtld_map.l_prev = main_map->l_searchlist.r_list[i - 1];
+ GL(dl_rtld_map).l_prev = main_map->l_searchlist.r_list[i - 1];
if (__glibc_likely (state.mode == rtld_mode_normal))
{
- _dl_rtld_map.l_next = (i + 1 < main_map->l_searchlist.r_nlist
+ GL(dl_rtld_map).l_next = (i + 1 < main_map->l_searchlist.r_nlist
? main_map->l_searchlist.r_list[i + 1]
: NULL);
#ifdef NEED_DL_SYSINFO_DSO
if (GLRO(dl_sysinfo_map) != NULL
- && _dl_rtld_map.l_prev->l_next == GLRO(dl_sysinfo_map)
- && _dl_rtld_map.l_next != GLRO(dl_sysinfo_map))
- _dl_rtld_map.l_prev = GLRO(dl_sysinfo_map);
+ && GL(dl_rtld_map).l_prev->l_next == GLRO(dl_sysinfo_map)
+ && GL(dl_rtld_map).l_next != GLRO(dl_sysinfo_map))
+ GL(dl_rtld_map).l_prev = GLRO(dl_sysinfo_map);
#endif
}
else
@@ -1960,14 +1959,14 @@ dl_main (const ElfW(Phdr) *phdr,
In this case it doesn't matter much where we put the
interpreter object, so we just initialize the list pointer so
that the assertion below holds. */
- _dl_rtld_map.l_next = _dl_rtld_map.l_prev->l_next;
+ GL(dl_rtld_map).l_next = GL(dl_rtld_map).l_prev->l_next;
- assert (_dl_rtld_map.l_prev->l_next == _dl_rtld_map.l_next);
- _dl_rtld_map.l_prev->l_next = &_dl_rtld_map;
- if (_dl_rtld_map.l_next != NULL)
+ assert (GL(dl_rtld_map).l_prev->l_next == GL(dl_rtld_map).l_next);
+ GL(dl_rtld_map).l_prev->l_next = &GL(dl_rtld_map);
+ if (GL(dl_rtld_map).l_next != NULL)
{
- assert (_dl_rtld_map.l_next->l_prev == _dl_rtld_map.l_prev);
- _dl_rtld_map.l_next->l_prev = &_dl_rtld_map;
+ assert (GL(dl_rtld_map).l_next->l_prev == GL(dl_rtld_map).l_prev);
+ GL(dl_rtld_map).l_next->l_prev = &GL(dl_rtld_map);
}
/* Now let us see whether all libraries are available in the
@@ -2109,7 +2108,7 @@ dl_main (const ElfW(Phdr) *phdr,
while (i-- > 0)
{
struct link_map *l = main_map->l_initfini[i];
- if (l != &_dl_rtld_map && ! l->l_faked)
+ if (l != &GL(dl_rtld_map) && ! l->l_faked)
{
args.l = l;
_dl_receive_error (print_unresolved, relocate_doit,
@@ -2308,7 +2307,7 @@ dl_main (const ElfW(Phdr) *phdr,
{
RTLD_TIMING_VAR (start);
rtld_timer_start (&start);
- _dl_relocate_object_no_relro (&_dl_rtld_map, main_map->l_scope, 0, 0);
+ _dl_relocate_object_no_relro (&GL(dl_rtld_map), main_map->l_scope, 0, 0);
rtld_timer_accum (&relocate_time, start);
__rtld_mutex_init ();
@@ -2319,7 +2318,7 @@ dl_main (const ElfW(Phdr) *phdr,
}
/* All ld.so initialization is complete. Apply RELRO. */
- _dl_protect_relro (&_dl_rtld_map);
+ _dl_protect_relro (&GL(dl_rtld_map));
/* Relocation is complete. Perform early libc initialization. This
is the initial libc, even if audit modules have been loaded with
diff --git a/elf/setup-vdso.h b/elf/setup-vdso.h
index 935d9e3baf..8347e3355b 100644
--- a/elf/setup-vdso.h
+++ b/elf/setup-vdso.h
@@ -92,8 +92,8 @@ setup_vdso (struct link_map *main_map __attribute__ ((unused)),
/* Rearrange the list so this DSO appears after rtld_map. */
assert (l->l_next == NULL);
assert (l->l_prev == main_map);
- _dl_rtld_map.l_next = l;
- l->l_prev = &_dl_rtld_map;
+ GL(dl_rtld_map).l_next = l;
+ l->l_prev = &GL(dl_rtld_map);
*first_preload = &l->l_next;
# else
GL(dl_nns) = 1;
diff --git a/gmon/tst-sprofil.c b/gmon/tst-sprofil.c
index 55cf99abf4..a4827cbc03 100644
--- a/gmon/tst-sprofil.c
+++ b/gmon/tst-sprofil.c
@@ -34,6 +34,7 @@ size_t taddr[] =
0x00010000 /* Linux elf32/sparc */
#if __WORDSIZE > 32
,
+ 0x4000000000000000, /* Linux elf64/ia64 */
0x0000000120000000, /* Linux elf64/alpha */
0x4000000000001000, /* elf64/hppa */
0x0000000100000000 /* Linux elf64/sparc */
diff --git a/manual/install.texi b/manual/install.texi
index 8f8f0a17b9..861d0371e5 100644
--- a/manual/install.texi
+++ b/manual/install.texi
@@ -680,6 +680,8 @@ patches, although we try to avoid this.
If you are installing @theglibc{} on @gnulinuxsystems{}, you need to have
the header files from a 3.2 or newer kernel around for reference.
+(For the ia64 architecture, you need version 3.2.18 or newer because this
+is the first version with support for the @code{accept4} system call.)
These headers must be installed using @samp{make headers_install}; the
headers present in the kernel source directory are not suitable for
direct use by @theglibc{}. You do not need to use that kernel, just have
diff --git a/manual/math.texi b/manual/math.texi
index fbabf37b48..74ee6239a0 100644
--- a/manual/math.texi
+++ b/manual/math.texi
@@ -69,7 +69,7 @@ Support for @code{_Float@var{N}} or @code{_Float@var{N}x} types is
provided for @code{_Float32}, @code{_Float64} and @code{_Float32x} on
all platforms.
It is also provided for @code{_Float128} and @code{_Float64x} on
-powerpc64le (PowerPC 64-bits little-endian), x86_64, x86,
+powerpc64le (PowerPC 64-bits little-endian), x86_64, x86, ia64,
aarch64, alpha, loongarch, mips64, riscv, s390 and sparc.
@menu
diff --git a/math/libm-test-support.h b/math/libm-test-support.h
index c1c25786ce..7c90444ff5 100644
--- a/math/libm-test-support.h
+++ b/math/libm-test-support.h
@@ -128,7 +128,7 @@ extern const char doc[];
/* On some architectures, glibc can be built with compilers that do
not have suitable built-in functions for setting the payload of a
_Float128 NaN. */
-#if ((defined __x86_64__ || defined __i386__) \
+#if ((defined __x86_64__ || defined __i386__ || defined __ia64__) \
&& !__GNUC_PREREQ (7, 0))
# define XFAIL_FLOAT128_PAYLOAD (TEST_COND_binary128 ? XFAIL_TEST : 0)
#else
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index 800ca89720..384943c73c 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -179,7 +179,9 @@ __attribute ((always_inline))
guard_position (void *mem, size_t size, size_t guardsize, const struct pthread *pd,
size_t pagesize_m1)
{
-#if _STACK_GROWS_DOWN
+#ifdef NEED_SEPARATE_REGISTER_STACK
+ return mem + (((size - guardsize) / 2) & ~pagesize_m1);
+#elif _STACK_GROWS_DOWN
return mem;
#elif _STACK_GROWS_UP
return (char *) (((uintptr_t) pd - guardsize) & ~pagesize_m1);
@@ -218,7 +220,7 @@ setup_stack_prot (char *mem, size_t size, struct pthread *pd,
{
const int prot = stack_prot ();
char *guardend = guard + guardsize;
-#if _STACK_GROWS_DOWN
+#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
/* As defined at guard_position, for architectures with downward stack
the guard page is always at start of the allocated area. */
if (__mprotect (guardend, size - guardsize, prot) != 0)
@@ -285,7 +287,7 @@ adjust_stack_prot (char *mem, size_t size, const struct pthread *pd,
if (pd->stack_mode == ALLOCATE_GUARD_MADV_GUARD)
{
void *slack =
-#if _STACK_GROWS_DOWN
+#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
mem + guardsize;
#else
guard_position (mem, size, pd->guardsize, pd, pagesize_m1);
@@ -295,7 +297,7 @@ adjust_stack_prot (char *mem, size_t size, const struct pthread *pd,
else if (pd->stack_mode == ALLOCATE_GUARD_PROT_NONE)
{
const int prot = stack_prot ();
-#if _STACK_GROWS_DOWN
+#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
return __mprotect (mem + guardsize, slacksize, prot) == 0;
#else
char *new_guard = (char *)(((uintptr_t) pd - guardsize)
@@ -320,7 +322,7 @@ advise_stack_range (void *mem, size_t size, uintptr_t pd, size_t guardsize)
{
uintptr_t sp = (uintptr_t) CURRENT_STACK_FRAME;
size_t pagesize_m1 = __getpagesize () - 1;
-#if _STACK_GROWS_DOWN
+#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
size_t freesize = (sp - (uintptr_t) mem) & ~pagesize_m1;
assert (freesize < size);
if (freesize > PTHREAD_STACK_MIN)
@@ -655,7 +657,7 @@ name_stack_maps (struct pthread *pd, bool set)
{
size_t adjust = pd->stack_mode == ALLOCATE_GUARD_PROT_NONE ?
pd->guardsize : 0;
-#if _STACK_GROWS_DOWN
+#if _STACK_GROWS_DOWN && !defined(NEED_SEPARATE_REGISTER_STACK)
void *stack = pd->stackblock + adjust;
#else
void *stack = pd->stackblock;
diff --git a/nptl/perf.c b/nptl/perf.c
index 5defd622ca..4ebbf87954 100644
--- a/nptl/perf.c
+++ b/nptl/perf.c
@@ -708,6 +708,8 @@ clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
({ unsigned int _hi, _lo; \
asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \
(Var) = ((unsigned long long int) _hi << 32) | _lo; })
+#elif defined __ia64__
+#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
#else
#error "HP_TIMING_NOW missing"
#endif
diff --git a/scripts/build-many-glibcs.py b/scripts/build-many-glibcs.py
index 36b1044c28..c659f54e2a 100755
--- a/scripts/build-many-glibcs.py
+++ b/scripts/build-many-glibcs.py
@@ -232,6 +232,10 @@ class Context(object):
os_name='linux-gnu')
self.add_config(arch='i686',
os_name='gnu')
+ self.add_config(arch='ia64',
+ os_name='linux-gnu',
+ first_gcc_cfg=['--with-system-libunwind'],
+ binutils_cfg=['--enable-obsolete'])
self.add_config(arch='loongarch64',
os_name='linux-gnu',
variant='lp64d',
@@ -1323,6 +1327,7 @@ def install_linux_headers(policy, cmdlist):
'i586': 'x86',
'i686': 'x86',
'i786': 'x86',
+ 'ia64': 'ia64',
'loongarch64': 'loongarch',
'm68k': 'm68k',
'microblaze': 'microblaze',
diff --git a/support/xclone.c b/support/xclone.c
index 0bfd10ecf6..0239bbf0d3 100644
--- a/support/xclone.c
+++ b/support/xclone.c
@@ -27,11 +27,18 @@ xclone (int (*fn) (void *arg), void *arg, void *stack, size_t stack_size,
{
pid_t r = -1;
-# if _STACK_GROWS_DOWN
+# ifdef __ia64__
+ extern int __clone2 (int (*fn) (void *arg), void *stack, size_t stack_size,
+ int flags, void *arg, ...);
+ r = __clone2 (fn, stack, stack_size, flags, arg, /* ptid */ NULL,
+ /* tls */ NULL, /* ctid */ NULL);
+# else
+# if _STACK_GROWS_DOWN
r = clone (fn, stack + stack_size, flags, arg, /* ptid */ NULL,
/* tls */ NULL, /* ctid */ NULL);
-# elif _STACK_GROWS_UP
+# elif _STACK_GROWS_UP
r = clone (fn, stack, flags, arg, /* ptid */ NULL, /* tls */ NULL, NULL);
+# endif
# endif
if (r < 0)
diff --git a/sysdeps/generic/dl-fptr.h b/sysdeps/generic/dl-fptr.h
new file mode 100644
index 0000000000..71cfb62411
--- /dev/null
+++ b/sysdeps/generic/dl-fptr.h
@@ -0,0 +1,45 @@
+/* Function descriptors. Generic version.
+ Copyright (C) 1995-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef dl_fptr_h
+#define dl_fptr_h 1
+
+/* An FDESC is a function descriptor. */
+
+struct fdesc
+ {
+ ElfW(Addr) ip; /* code entry point */
+ ElfW(Addr) gp; /* global pointer */
+ };
+
+struct fdesc_table
+ {
+ struct fdesc_table *next;
+ unsigned int len; /* # of entries in fdesc table */
+ volatile unsigned int first_unused; /* index of first available entry */
+ struct fdesc fdesc[0];
+ };
+
+struct link_map;
+
+extern ElfW(Addr) _dl_boot_fptr_table [];
+
+extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *,
+ ElfW(Addr));
+
+#endif /* !dl_fptr_h */
diff --git a/sysdeps/generic/ldconfig.h b/sysdeps/generic/ldconfig.h
index 7bc8788647..5913e02f0d 100644
--- a/sysdeps/generic/ldconfig.h
+++ b/sysdeps/generic/ldconfig.h
@@ -30,6 +30,7 @@
#define FLAG_ELF_LIBC6 0x0003
#define FLAG_REQUIRED_MASK 0xff00
#define FLAG_SPARC_LIB64 0x0100
+#define FLAG_IA64_LIB64 0x0200
#define FLAG_X8664_LIB64 0x0300
#define FLAG_S390_LIB64 0x0400
#define FLAG_POWERPC_LIB64 0x0500
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 74025f1f93..16a48b57ed 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -408,6 +408,14 @@ struct rtld_global
/* List of search directories. */
EXTERN struct r_search_path_elem *_dl_all_dirs;
+ /* Structure describing the dynamic linker itself. */
+ EXTERN struct link_map _dl_rtld_map;
+#ifdef SHARED
+ /* Used to store the audit information for the link map of the
+ dynamic loader. */
+ struct auditstate _dl_rtld_auditstate[DL_NNS];
+#endif
+
/* Get architecture specific definitions. */
#define PROCINFO_DECL
#ifndef PROCINFO_CLASS
@@ -1369,18 +1377,11 @@ rtld_active (void)
return GLRO(dl_init_all_dirs) != NULL;
}
-/* Pre-allocated link map for the dynamic linker itself. */
-extern struct link_map _dl_rtld_map attribute_hidden;
-
-/* Used to store the audit information for the link map of the
- dynamic loader. */
-extern struct auditstate _dl_rtld_auditstate[DL_NNS] attribute_hidden;
-
/* Returns true of L is the link map of the dynamic linker itself. */
static inline bool
is_rtld_link_map (const struct link_map *l)
{
- return l == &_dl_rtld_map;
+ return l == &GL(dl_rtld_map);
}
static inline struct auditstate *
@@ -1388,7 +1389,7 @@ link_map_audit_state (struct link_map *l, size_t index)
{
if (is_rtld_link_map (l))
/* The auditstate array is stored separately. */
- return _dl_rtld_auditstate + index;
+ return &GL (dl_rtld_auditstate) [index];
else
{
/* The auditstate array follows the link map in memory. */
diff --git a/sysdeps/generic/unwind-dw2.c b/sysdeps/generic/unwind-dw2.c
index f0ed7b3a96..f3ac24b017 100644
--- a/sysdeps/generic/unwind-dw2.c
+++ b/sysdeps/generic/unwind-dw2.c
@@ -224,6 +224,7 @@ _Unwind_FindEnclosingFunction (void *pc)
return NULL;
}
+#ifndef __ia64__
_Unwind_Ptr
_Unwind_GetDataRelBase (struct _Unwind_Context *context)
{
@@ -235,6 +236,7 @@ _Unwind_GetTextRelBase (struct _Unwind_Context *context)
{
return (_Unwind_Ptr) context->bases.tbase;
}
+#endif
/* Extract any interesting information from the CIE for the translation
unit F belongs to. Return a pointer to the byte after the augmentation,
diff --git a/sysdeps/generic/unwind.h b/sysdeps/generic/unwind.h
index 4f159a1726..79e67082b6 100644
--- a/sysdeps/generic/unwind.h
+++ b/sysdeps/generic/unwind.h
@@ -33,7 +33,11 @@ extern "C" {
inefficient for 32-bit and smaller machines. */
typedef unsigned _Unwind_Word __attribute__((__mode__(__unwind_word__)));
typedef signed _Unwind_Sword __attribute__((__mode__(__unwind_word__)));
+#if defined(__ia64__) && defined(__hpux__)
+typedef unsigned _Unwind_Ptr __attribute__((__mode__(__word__)));
+#else
typedef unsigned _Unwind_Ptr __attribute__((__mode__(__pointer__)));
+#endif
typedef unsigned _Unwind_Internal_Ptr __attribute__((__mode__(__pointer__)));
/* @@@ The IA-64 ABI uses a 64-bit word to identify the producer and
@@ -186,8 +190,29 @@ extern void _Unwind_SjLj_Resume (struct _Unwind_Exception *);
and data-relative addressing in the LDSA. In order to stay link
compatible with the standard ABI for IA-64, we inline these. */
+#ifdef __ia64__
+#include
+
+static inline _Unwind_Ptr
+_Unwind_GetDataRelBase (struct _Unwind_Context *_C)
+{
+ /* The GP is stored in R1. */
+ return _Unwind_GetGR (_C, 1);
+}
+
+static inline _Unwind_Ptr
+_Unwind_GetTextRelBase (struct _Unwind_Context *_C)
+{
+ abort ();
+ return 0;
+}
+
+/* @@@ Retrieve the Backing Store Pointer of the given context. */
+extern _Unwind_Word _Unwind_GetBSP (struct _Unwind_Context *);
+#else
extern _Unwind_Ptr _Unwind_GetDataRelBase (struct _Unwind_Context *);
extern _Unwind_Ptr _Unwind_GetTextRelBase (struct _Unwind_Context *);
+#endif
/* @@@ Given an address, return the entry point of the function that
contains it. */
diff --git a/sysdeps/hppa/dl-fptr.h b/sysdeps/hppa/dl-fptr.h
index 5f16c5b2b8..4d1b9dae0c 100644
--- a/sysdeps/hppa/dl-fptr.h
+++ b/sysdeps/hppa/dl-fptr.h
@@ -19,28 +19,7 @@
#ifndef dl_hppa_fptr_h
#define dl_hppa_fptr_h 1
-/* An FDESC is a function descriptor. */
-
-struct fdesc
- {
- ElfW(Addr) ip; /* code entry point */
- ElfW(Addr) gp; /* global pointer */
- };
-
-struct fdesc_table
- {
- struct fdesc_table *next;
- unsigned int len; /* # of entries in fdesc table */
- volatile unsigned int first_unused; /* index of first available entry */
- struct fdesc fdesc[0];
- };
-
-struct link_map;
-
-extern ElfW(Addr) _dl_boot_fptr_table [];
-
-extern ElfW(Addr) _dl_make_fptr (struct link_map *, const ElfW(Sym) *,
- ElfW(Addr));
+#include
/* Initialize function pointer code. Call before relocation processing. */
extern void _dl_fptr_init (void);
diff --git a/sysdeps/ia64/Implies b/sysdeps/ia64/Implies
new file mode 100644
index 0000000000..22e4836e74
--- /dev/null
+++ b/sysdeps/ia64/Implies
@@ -0,0 +1,6 @@
+wordsize-64
+# ia64 uses IEEE 754 floating point.
+ieee754/float128
+ieee754/ldbl-96
+ieee754/dbl-64
+ieee754/flt-32
diff --git a/sysdeps/ia64/Makeconfig b/sysdeps/ia64/Makeconfig
new file mode 100644
index 0000000000..2f1abe868c
--- /dev/null
+++ b/sysdeps/ia64/Makeconfig
@@ -0,0 +1,4 @@
+# ia64 does not provide crtbeginT.o, so use crtbegin.o.
++prectorT = $(+prector)
+
+float64x-alias-fcts = yes
diff --git a/sysdeps/ia64/Makefile b/sysdeps/ia64/Makefile
new file mode 100644
index 0000000000..a4305d524d
--- /dev/null
+++ b/sysdeps/ia64/Makefile
@@ -0,0 +1,25 @@
+# The ia64 `long double' is a distinct type we support.
+long-double-fcts = yes
+
+ifeq ($(subdir),math)
+# sqrtf128 requires soft-fp.
+CPPFLAGS += -I../soft-fp
+endif
+
+ifeq ($(subdir),gmon)
+sysdep_routines += _mcount
+endif
+
+ifeq ($(subdir), csu)
+CPPFLAGS-start.S = -D__ASSEMBLY__
+
+ifeq (yes,$(build-shared))
+# Compatibility
+sysdep_routines += ia64libgcc
+shared-only-routines += ia64libgcc
+endif
+endif
+
+ifeq ($(subdir),elf)
+sysdep-dl-routines += dl-symaddr dl-fptr
+endif
diff --git a/sysdeps/ia64/Versions b/sysdeps/ia64/Versions
new file mode 100644
index 0000000000..ac3bff9b78
--- /dev/null
+++ b/sysdeps/ia64/Versions
@@ -0,0 +1,21 @@
+ld {
+ GLIBC_PRIVATE {
+ # ia64 specific functions in the dynamic linker, but used by libc.so.
+ _dl_symbol_address; _dl_lookup_address;
+ _dl_function_address;
+ }
+}
+libc {
+ GLIBC_2.2 {
+ # Functions from libgcc.
+ __divtf3; __divdf3; __divsf3; __divdi3; __moddi3; __udivdi3; __umoddi3;
+ __multi3;
+ }
+}
+libm {
+ GLIBC_2.1 {
+ # A generic bug got this omitted from other configurations' version
+ # sets, but we always had it.
+ exp2l;
+ }
+}
diff --git a/sysdeps/ia64/_mcount.S b/sysdeps/ia64/_mcount.S
new file mode 100644
index 0000000000..21e2949b7e
--- /dev/null
+++ b/sysdeps/ia64/_mcount.S
@@ -0,0 +1,90 @@
+/* Machine-specific calling sequence for `mcount' profiling function. ia64
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+/* Assembly stub to invoke _mcount(). Compiler generated code calls
+ this stub before executing a function's prologue and without saving
+ any registers. It is therefore necessary to preserve the input
+ registers as they may contain function arguments. To work
+ correctly with frame-less functions, it is also necessary to
+ preserve the return pointer (b0 aka rp).
+
+ State upon entering _mcount:
+
+ r8 address of return value structure (used only when called
+ function returns a large structure)
+ r15 static link (used only for nested functions)
+ in0 ar.pfs to restore before returning to the function that
+ called _mcount
+ in1 gp value to restore before returning to the function that
+ called _mcount
+ in2 return address in the function that invoked the caller
+ of _mcount (frompc)
+ in3 address of the global-offset table entry that holds the
+ profile count dword allocated by the compiler; to get
+ the address of this dword, use "ld8 in2=[in2]; this
+ dword can be used in any way by _mcount (including
+ not at all, as is the case with the current implementation)
+ b0 address to return to after _mcount is done
+*/
+
+#include
+
+#undef ret
+
+LEAF(_mcount)
+ .prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+ alloc loc1 = ar.pfs, 4, 4, 3, 0
+ mov loc0 = rp
+ .body
+ mov loc2 = r8 // gcc uses r8 to pass pointer to return structure
+ ;;
+ mov loc3 = r15 // gcc uses r15 to pass the static link to nested functions
+ mov out0 = in2
+ mov out1 = rp
+ br.call.sptk.few rp = __mcount
+ ;;
+.here:
+{
+ .mii
+ mov gp = in1
+ mov r2 = ip
+ mov ar.pfs = loc1
+}
+ ;;
+ adds r2 = _mcount_ret_helper - .here, r2
+ mov b7 = loc0
+ mov rp = in2
+ ;;
+ mov r3 = in0
+ mov r8 = loc2
+ mov r15 = loc3
+ mov b6 = r2
+ br.ret.sptk.few b6
+END(_mcount)
+
+LOCAL_LEAF(_mcount_ret_helper)
+ .prologue
+ .altrp b7
+ .save ar.pfs, r3
+ .body
+ alloc r2 = ar.pfs, 0, 0, 8, 0
+ mov ar.pfs = r3
+ br b7
+END(_mcount_ret_helper)
+
+weak_alias (_mcount, mcount)
diff --git a/sysdeps/ia64/abort-instr.h b/sysdeps/ia64/abort-instr.h
new file mode 100644
index 0000000000..8f26f0fc2a
--- /dev/null
+++ b/sysdeps/ia64/abort-instr.h
@@ -0,0 +1,3 @@
+/* An instruction which should crash any program is `break 0' which triggers
+ SIGILL. */
+#define ABORT_INSTRUCTION asm ("break 0")
diff --git a/sysdeps/ia64/atomic-machine.h b/sysdeps/ia64/atomic-machine.h
new file mode 100644
index 0000000000..0eb760a5e9
--- /dev/null
+++ b/sysdeps/ia64/atomic-machine.h
@@ -0,0 +1,96 @@
+/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+#define __HAVE_64B_ATOMICS 1
+#define USE_ATOMIC_COMPILER_BUILTINS 0
+
+/* XXX Is this actually correct? */
+#define ATOMIC_EXCHANGE_USES_CAS 0
+
+
+#define __arch_compare_and_exchange_bool_8_acq(mem, newval, oldval) \
+ (abort (), 0)
+
+#define __arch_compare_and_exchange_bool_16_acq(mem, newval, oldval) \
+ (abort (), 0)
+
+#define __arch_compare_and_exchange_bool_32_acq(mem, newval, oldval) \
+ (!__sync_bool_compare_and_swap ((mem), (int) (long) (oldval), \
+ (int) (long) (newval)))
+
+#define __arch_compare_and_exchange_bool_64_acq(mem, newval, oldval) \
+ (!__sync_bool_compare_and_swap ((mem), (long) (oldval), \
+ (long) (newval)))
+
+#define __arch_compare_and_exchange_val_8_acq(mem, newval, oldval) \
+ (abort (), (__typeof (*mem)) 0)
+
+#define __arch_compare_and_exchange_val_16_acq(mem, newval, oldval) \
+ (abort (), (__typeof (*mem)) 0)
+
+#define __arch_compare_and_exchange_val_32_acq(mem, newval, oldval) \
+ __sync_val_compare_and_swap ((mem), (int) (long) (oldval), \
+ (int) (long) (newval))
+
+#define __arch_compare_and_exchange_val_64_acq(mem, newval, oldval) \
+ __sync_val_compare_and_swap ((mem), (long) (oldval), (long) (newval))
+
+/* Atomically store newval and return the old value. */
+#define atomic_exchange_acq(mem, value) \
+ __sync_lock_test_and_set (mem, value)
+
+#define atomic_exchange_rel(mem, value) \
+ (__sync_synchronize (), __sync_lock_test_and_set (mem, value))
+
+#define atomic_exchange_and_add(mem, value) \
+ __sync_fetch_and_add ((mem), (value))
+
+#define atomic_decrement_if_positive(mem) \
+ ({ __typeof (*mem) __oldval, __val; \
+ __typeof (mem) __memp = (mem); \
+ \
+ __val = (*__memp); \
+ do \
+ { \
+ __oldval = __val; \
+ if (__builtin_expect (__val <= 0, 0)) \
+ break; \
+ __val = atomic_compare_and_exchange_val_acq (__memp, __oldval - 1, \
+ __oldval); \
+ } \
+ while (__builtin_expect (__val != __oldval, 0)); \
+ __oldval; })
+
+#define atomic_bit_test_set(mem, bit) \
+ ({ __typeof (*mem) __oldval, __val; \
+ __typeof (mem) __memp = (mem); \
+ __typeof (*mem) __mask = ((__typeof (*mem)) 1 << (bit)); \
+ \
+ __val = (*__memp); \
+ do \
+ { \
+ __oldval = __val; \
+ __val = atomic_compare_and_exchange_val_acq (__memp, \
+ __oldval | __mask, \
+ __oldval); \
+ } \
+ while (__builtin_expect (__val != __oldval, 0)); \
+ __oldval & __mask; })
+
+#define atomic_full_barrier() __sync_synchronize ()
diff --git a/sysdeps/ia64/bits/endianness.h b/sysdeps/ia64/bits/endianness.h
new file mode 100644
index 0000000000..70c211e569
--- /dev/null
+++ b/sysdeps/ia64/bits/endianness.h
@@ -0,0 +1,11 @@
+#ifndef _BITS_ENDIANNESS_H
+#define _BITS_ENDIANNESS_H 1
+
+#ifndef _BITS_ENDIAN_H
+# error "Never use directly; include instead."
+#endif
+
+/* IA64 is little-endian. */
+#define __BYTE_ORDER __LITTLE_ENDIAN
+
+#endif /* bits/endianness.h */
diff --git a/sysdeps/ia64/bits/fenv.h b/sysdeps/ia64/bits/fenv.h
new file mode 100644
index 0000000000..60163413b7
--- /dev/null
+++ b/sysdeps/ia64/bits/fenv.h
@@ -0,0 +1,104 @@
+/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _FENV_H
+# error "Never use directly; include instead."
+#endif
+
+
+/* Define bits representing the exception. We use the bit positions of
+ the appropriate bits in the FPSR... (Tahoe EAS 2.4 5-4)*/
+
+enum
+ {
+ FE_INEXACT =
+#define FE_INEXACT (1 << 5)
+ FE_INEXACT,
+
+ FE_UNDERFLOW =
+#define FE_UNDERFLOW (1 << 4)
+ FE_UNDERFLOW,
+
+ FE_OVERFLOW =
+#define FE_OVERFLOW (1 << 3)
+ FE_OVERFLOW,
+
+ FE_DIVBYZERO =
+#define FE_DIVBYZERO (1 << 2)
+ FE_DIVBYZERO,
+
+ FE_UNNORMAL =
+#define FE_UNNORMAL (1 << 1)
+ FE_UNNORMAL,
+
+ FE_INVALID =
+#define FE_INVALID (1 << 0)
+ FE_INVALID,
+
+ FE_ALL_EXCEPT =
+#define FE_ALL_EXCEPT (FE_INEXACT | FE_UNDERFLOW | FE_OVERFLOW | FE_DIVBYZERO | FE_UNNORMAL | FE_INVALID)
+ FE_ALL_EXCEPT
+ };
+
+
+enum
+ {
+ FE_TOWARDZERO =
+#define FE_TOWARDZERO 3
+ FE_TOWARDZERO,
+
+ FE_UPWARD =
+#define FE_UPWARD 2
+ FE_UPWARD,
+
+ FE_DOWNWARD =
+#define FE_DOWNWARD 1
+ FE_DOWNWARD,
+
+ FE_TONEAREST =
+#define FE_TONEAREST 0
+ FE_TONEAREST,
+ };
+
+
+/* Type representing exception flags. */
+typedef unsigned long int fexcept_t;
+
+/* Type representing floating-point environment. */
+typedef unsigned long int fenv_t;
+
+/* If the default argument is used we use this value. */
+#define FE_DFL_ENV ((const fenv_t *) 0xc009804c0270033fUL)
+
+#ifdef __USE_GNU
+/* Floating-point environment where only FE_UNNORMAL is masked since this
+ exception is not generally supported by glibc. */
+# define FE_NOMASK_ENV ((const fenv_t *) 0xc009804c02700302UL)
+
+/* Floating-point environment with (processor-dependent) non-IEEE
+ floating point. In this case, turning on flush-to-zero mode for
+ s0, s2, and s3. */
+# define FE_NONIEEE_ENV ((const fenv_t *) 0xc009a04d0270037fUL)
+#endif
+
+#if __GLIBC_USE (IEC_60559_BFP_EXT_C23)
+/* Type representing floating-point control modes. */
+typedef unsigned long int femode_t;
+
+/* Default floating-point control modes. */
+# define FE_DFL_MODE ((const femode_t *) 0xc009804c0270033fUL)
+#endif
diff --git a/sysdeps/ia64/bits/floatn.h b/sysdeps/ia64/bits/floatn.h
new file mode 100644
index 0000000000..297c4d2c6a
--- /dev/null
+++ b/sysdeps/ia64/bits/floatn.h
@@ -0,0 +1,119 @@
+/* Macros to control TS 18661-3 glibc features on ia64.
+ Copyright (C) 2017-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _BITS_FLOATN_H
+#define _BITS_FLOATN_H
+
+#include
+
+/* Defined to 1 if the current compiler invocation provides a
+ floating-point type with the IEEE 754 binary128 format, and this
+ glibc includes corresponding *f128 interfaces for it. The required
+ libgcc support was added some time after the basic compiler
+ support. */
+#if __GNUC_PREREQ (4, 4)
+# define __HAVE_FLOAT128 1
+#else
+# define __HAVE_FLOAT128 0
+#endif
+
+/* Defined to 1 if __HAVE_FLOAT128 is 1 and the type is ABI-distinct
+ from the default float, double and long double types in this glibc. */
+#if __HAVE_FLOAT128
+# define __HAVE_DISTINCT_FLOAT128 1
+#else
+# define __HAVE_DISTINCT_FLOAT128 0
+#endif
+
+/* Defined to 1 if the current compiler invocation provides a
+ floating-point type with the right format for _Float64x, and this
+ glibc includes corresponding *f64x interfaces for it. */
+#define __HAVE_FLOAT64X 1
+
+/* Defined to 1 if __HAVE_FLOAT64X is 1 and _Float64x has the format
+ of long double. Otherwise, if __HAVE_FLOAT64X is 1, _Float64x has
+ the format of _Float128, which must be different from that of long
+ double. */
+#define __HAVE_FLOAT64X_LONG_DOUBLE 1
+
+#ifndef __ASSEMBLER__
+
+/* Defined to concatenate the literal suffix to be used with _Float128
+ types, if __HAVE_FLOAT128 is 1. */
+# if __HAVE_FLOAT128
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
+/* The literal suffix f128 exists only since GCC 7.0. */
+# define __f128(x) x##q
+# else
+# define __f128(x) x##f128
+# endif
+# endif
+
+/* Defined to a complex binary128 type if __HAVE_FLOAT128 is 1. */
+# if __HAVE_FLOAT128
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
+/* Add a typedef for older GCC compilers which don't natively support
+ _Complex _Float128. */
+typedef _Complex float __cfloat128 __attribute__ ((__mode__ (__TC__)));
+# define __CFLOAT128 __cfloat128
+# else
+# define __CFLOAT128 _Complex _Float128
+# endif
+# endif
+
+/* The remaining of this file provides support for older compilers. */
+# if __HAVE_FLOAT128
+
+/* The type _Float128 exists only since GCC 7.0. */
+# if !__GNUC_PREREQ (7, 0) || (defined __cplusplus && !__GNUC_PREREQ (13, 0))
+typedef __float128 _Float128;
+# endif
+
+/* __builtin_huge_valf128 doesn't exist before GCC 7.0. */
+# if !__GNUC_PREREQ (7, 0)
+# define __builtin_huge_valf128() ((_Float128) __builtin_huge_val ())
+# endif
+
+/* Older GCC has only a subset of built-in functions for _Float128 on
+ ia64, and __builtin_infq is not usable in static initializers.
+ Converting a narrower sNaN to _Float128 produces a quiet NaN, so
+ attempts to use _Float128 sNaNs will not work properly with older
+ compilers. */
+# if !__GNUC_PREREQ (7, 0)
+# define __builtin_copysignf128 __builtin_copysignq
+# define __builtin_fabsf128 __builtin_fabsq
+# define __builtin_inff128() ((_Float128) __builtin_inf ())
+# define __builtin_nanf128(x) ((_Float128) __builtin_nan (x))
+# define __builtin_nansf128(x) ((_Float128) __builtin_nans (x))
+# endif
+
+/* In math/math.h, __MATH_TG will expand signbit to __builtin_signbit*,
+ e.g.: __builtin_signbitf128, before GCC 6. However, there has never
+ been a __builtin_signbitf128 in GCC and the type-generic builtin is
+ only available since GCC 6. */
+# if !__GNUC_PREREQ (6, 0)
+# define __builtin_signbitf128 __signbitf128
+# endif
+
+# endif
+
+#endif /* !__ASSEMBLER__. */
+
+#include
+
+#endif /* _BITS_FLOATN_H */
diff --git a/sysdeps/ia64/bits/fp-logb.h b/sysdeps/ia64/bits/fp-logb.h
new file mode 100644
index 0000000000..4c40c9e9c2
--- /dev/null
+++ b/sysdeps/ia64/bits/fp-logb.h
@@ -0,0 +1,24 @@
+/* Define __FP_LOGB0_IS_MIN and __FP_LOGBNAN_IS_MIN. IA64 version.
+ Copyright (C) 2016-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _MATH_H
+# error "Never use directly; include instead."
+#endif
+
+#define __FP_LOGB0_IS_MIN 1
+#define __FP_LOGBNAN_IS_MIN 0
diff --git a/sysdeps/ia64/bits/link.h b/sysdeps/ia64/bits/link.h
new file mode 100644
index 0000000000..662a46c304
--- /dev/null
+++ b/sysdeps/ia64/bits/link.h
@@ -0,0 +1,62 @@
+/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _LINK_H
+# error "Never include directly; use instead."
+#endif
+
+/* Registers for entry into PLT on ia64. */
+typedef struct La_ia64_regs
+{
+ uint64_t lr_r8;
+ uint64_t lr_r9;
+ uint64_t lr_r10;
+ uint64_t lr_r11;
+ uint64_t lr_gr [8];
+ long double lr_fr [8];
+ uint64_t lr_unat;
+ uint64_t lr_sp;
+} La_ia64_regs;
+
+/* Return values for calls from PLT on ia64. */
+typedef struct La_ia64_retval
+{
+ uint64_t lrv_r8;
+ uint64_t lrv_r9;
+ uint64_t lrv_r10;
+ uint64_t lrv_r11;
+ long double lr_fr [8];
+} La_ia64_retval;
+
+
+__BEGIN_DECLS
+
+extern Elf64_Addr la_ia64_gnu_pltenter (Elf64_Sym *__sym, unsigned int __ndx,
+ uintptr_t *__refcook,
+ uintptr_t *__defcook,
+ La_ia64_regs *__regs,
+ unsigned int *__flags,
+ const char *__symname,
+ long int *__framesizep);
+extern unsigned int la_ia64_gnu_pltexit (Elf64_Sym *__sym, unsigned int __ndx,
+ uintptr_t *__refcook,
+ uintptr_t *__defcook,
+ const La_ia64_regs *__inregs,
+ La_ia64_retval *__outregs,
+ const char *__symname);
+
+__END_DECLS
diff --git a/sysdeps/ia64/bzero.c b/sysdeps/ia64/bzero.c
new file mode 100644
index 0000000000..79771f3e91
--- /dev/null
+++ b/sysdeps/ia64/bzero.c
@@ -0,0 +1,3 @@
+/* ia64 does not export __bzero symbol. */
+#define __bzero bzero
+#include
diff --git a/sysdeps/ia64/configure b/sysdeps/ia64/configure
new file mode 100644
index 0000000000..d4e4c51f4c
--- /dev/null
+++ b/sysdeps/ia64/configure
@@ -0,0 +1,9 @@
+# This file is generated from configure.ac by Autoconf. DO NOT EDIT!
+ # Local configure fragment for sysdeps/ia64.
+
+# PIE builds fail on binutils 2.37 and earlier, see:
+# https://sourceware.org/bugzilla/show_bug.cgi?id=28672
+printf "%s\n" "#define PIE_UNSUPPORTED 1" >>confdefs.h
+
+# work around problem with autoconf and empty lines at the end of files
+
diff --git a/sysdeps/ia64/configure.ac b/sysdeps/ia64/configure.ac
new file mode 100644
index 0000000000..6958c26415
--- /dev/null
+++ b/sysdeps/ia64/configure.ac
@@ -0,0 +1,7 @@
+GLIBC_PROVIDES dnl See aclocal.m4 in the top level source directory.
+# Local configure fragment for sysdeps/ia64.
+
+# PIE builds fail on binutils 2.37 and earlier, see:
+# https://sourceware.org/bugzilla/show_bug.cgi?id=28672
+AC_DEFINE(PIE_UNSUPPORTED)
+# work around problem with autoconf and empty lines at the end of files
diff --git a/sysdeps/ia64/crti.S b/sysdeps/ia64/crti.S
new file mode 100644
index 0000000000..2210b576e4
--- /dev/null
+++ b/sysdeps/ia64/crti.S
@@ -0,0 +1,162 @@
+/* Special .init and .fini section support for IA64.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ . */
+
+/* crti.S puts a function prologue at the beginning of the .init and
+ .fini sections and defines global symbols for those addresses, so
+ they can be called as functions. The symbols _init and _fini are
+ magic and cause the linker to emit DT_INIT and DT_FINI. */
+
+#include
+#include
+#undef ret
+
+#ifndef PREINIT_FUNCTION
+# define PREINIT_FUNCTION __gmon_start__
+#endif
+
+#ifndef PREINIT_FUNCTION_WEAK
+# define PREINIT_FUNCTION_WEAK 1
+#endif
+
+#if PREINIT_FUNCTION_WEAK
+ weak_extern (PREINIT_FUNCTION)
+#else
+ .hidden PREINIT_FUNCTION
+#endif
+
+/* If we have working .init_array support, we want to keep the .init
+ section empty (apart from the mandatory prologue/epilogue. This
+ ensures that the default unwind conventions (return-pointer in b0,
+ frame state in ar.pfs, etc.) will do the Right Thing. To ensure
+ an empty .init section, we register gmon_initializer() via the
+ .init_array.
+
+ --davidm 02/10/29 */
+
+#if PREINIT_FUNCTION_WEAK
+/* This blob of assembly code is one simple C function:
+
+static void
+__attribute__ ((used))
+gmon_initializer (void)
+{
+ extern void weak_function __gmon_start__ (void);
+
+ if (__gmon_start__)
+ (*__gmon_start__)();
+}
+ */
+ .text
+ .align 64
+ .proc gmon_initializer#
+gmon_initializer:
+ .prologue 12, 32
+ .mmi
+ .save ar.pfs, r33
+ alloc r33 = ar.pfs, 0, 3, 0, 0
+ addl r14 = @ltoff(@fptr(PREINIT_FUNCTION#)), gp
+ .save rp, r32
+ mov r32 = b0
+ .mmi
+ mov r34 = r1
+ .body
+ ;;
+ ld8 r14 = [r14]
+ nop 0
+ ;;
+ .mib
+ cmp.eq p6, p7 = 0, r14
+ nop 0
+ (p6) br.cond.spnt .L1
+ ;;
+ .mib
+ nop 0
+ nop 0
+ br.call.sptk.many b0 = PREINIT_FUNCTION#
+ ;;
+ .mmi
+ mov r1 = r34
+ nop 0
+ nop 0
+.L1:
+ .mii
+ nop 0
+ mov ar.pfs = r33
+ nop 0
+ ;;
+ .mib
+ nop 0
+ mov b0 = r32
+ br.ret.sptk.many b0
+ .endp gmon_initializer#
+# undef PREINIT_FUNCTION
+# define PREINIT_FUNCTION gmon_initializer
+#endif
+ .section .init_array, "aw"
+ data8 @fptr(PREINIT_FUNCTION)
+
+ .section .init,"ax",@progbits
+ .global _init#
+ .hidden _init#
+ .proc _init#
+_init:
+ .prologue
+ .save ar.pfs, r34
+ alloc r34 = ar.pfs, 0, 3, 0, 0
+ .vframe r32
+ mov r32 = r12
+ .save rp, r33
+ mov r33 = b0
+ .body
+ adds r12 = -16, r12
+ ;; /* see gmon_initializer() above */
+ .endp _init#
+
+ .section .fini,"ax",@progbits
+ .global _fini#
+ .hidden _fini#
+ .proc _fini#
+_fini:
+ .prologue
+ .save ar.pfs, r34
+ alloc r34 = ar.pfs, 0, 3, 0, 0
+ .vframe r32
+ mov r32 = r12
+ .save rp, r33
+ mov r33 = b0
+ .body
+ adds r12 = -16, r12
+ ;;
+ .endp _fini#
diff --git a/sysdeps/ia64/crtn.S b/sysdeps/ia64/crtn.S
new file mode 100644
index 0000000000..a4a8a685c1
--- /dev/null
+++ b/sysdeps/ia64/crtn.S
@@ -0,0 +1,69 @@
+/* Special .init and .fini section support for ARM.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ In addition to the permissions in the GNU Lesser General Public
+ License, the Free Software Foundation gives you unlimited
+ permission to link the compiled version of this file with other
+ programs, and to distribute those programs without any restriction
+ coming from the use of this file. (The GNU Lesser General Public
+ License restrictions do apply in other respects; for example, they
+ cover modification of the file, and distribution when not linked
+ into another program.)
+
+ Note that people who make modified versions of this file are not
+ obligated to grant this special exception for their modified
+ versions; it is their choice whether to do so. The GNU Lesser
+ General Public License gives permission to release a modified
+ version without this exception; this exception also makes it
+ possible to release a modified version which carries forward this
+ exception.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library. If not, see
+ . */
+
+#include
+#undef ret
+
+/* crtn.S puts function epilogues in the .init and .fini sections
+ corresponding to the prologues in crti.S. */
+
+ .section .init,"ax",@progbits
+ .proc _init#
+_init:
+ .prologue
+ .save ar.pfs, r34
+ .vframe r32
+ .save rp, r33
+ .body
+ .regstk 0,2,0,0
+ mov r12 = r32
+ mov ar.pfs = r34
+ mov b0 = r33
+ br.ret.sptk.many b0
+ .endp _init#
+
+ .section .fini,"ax",@progbits
+ .proc _fini#
+_fini:
+ .prologue
+ .save ar.pfs, r34
+ .vframe r32
+ .save rp, r33
+ .body
+ mov r12 = r32
+ mov ar.pfs = r34
+ mov b0 = r33
+ br.ret.sptk.many b0
+ .endp _fini#
diff --git a/sysdeps/ia64/dl-dtprocnum.h b/sysdeps/ia64/dl-dtprocnum.h
new file mode 100644
index 0000000000..57bbabe6f3
--- /dev/null
+++ b/sysdeps/ia64/dl-dtprocnum.h
@@ -0,0 +1,21 @@
+/* Configuration of lookup functions. IA-64 version.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+/* Number of extra dynamic section entries for this architecture. By
+ default there are none. */
+#define DT_THISPROCNUM DT_IA_64_NUM
diff --git a/sysdeps/ia64/dl-fptr.h b/sysdeps/ia64/dl-fptr.h
new file mode 100644
index 0000000000..29622f020e
--- /dev/null
+++ b/sysdeps/ia64/dl-fptr.h
@@ -0,0 +1,45 @@
+/* Function descriptors. IA64 version.
+ Copyright (C) 2003-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef dl_ia64_fptr_h
+#define dl_ia64_fptr_h 1
+
+#include
+#include
+
+#define COMPARE_AND_SWAP(ptr, old, new) \
+ __sync_bool_compare_and_swap (ptr, old, new)
+
+/* There are currently 123 dynamic symbols in ld.so.
+ ELF_MACHINE_BOOT_FPTR_TABLE_LEN needs to be at least that big. */
+#define ELF_MACHINE_BOOT_FPTR_TABLE_LEN 200
+
+#define ELF_MACHINE_LOAD_ADDRESS(var, symbol) \
+ asm ("movl %0 = @gprel (" #symbol ");; add %0 = %0, gp" : "=&r" (var));
+
+/* We don't have a gcc helper to extract the plabel info. */
+#define ELF_PTR_TO_FDESC(ptr) \
+ ({ union { \
+ void *_ptr; \
+ struct fdesc *_fdesc; \
+ } _u; \
+ _u._ptr = ptr; \
+ _u._fdesc; \
+ })
+
+#endif /* !dl_ia64_fptr_h */
diff --git a/sysdeps/ia64/dl-lookupcfg.h b/sysdeps/ia64/dl-lookupcfg.h
new file mode 100644
index 0000000000..2888854038
--- /dev/null
+++ b/sysdeps/ia64/dl-lookupcfg.h
@@ -0,0 +1,79 @@
+/* Configuration of lookup functions.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#define ELF_FUNCTION_PTR_IS_SPECIAL
+#define DL_UNMAP_IS_SPECIAL
+
+#include
+
+/* We do not support copy relocations for IA-64. */
+#define DL_NO_COPY_RELOCS
+
+/* Forward declaration. */
+struct link_map;
+
+extern void *_dl_symbol_address (struct link_map *map, const Elf64_Sym *ref);
+rtld_hidden_proto (_dl_symbol_address)
+
+#define DL_SYMBOL_ADDRESS(map, ref) _dl_symbol_address(map, ref)
+
+extern Elf64_Addr _dl_lookup_address (const void *address);
+
+#define DL_LOOKUP_ADDRESS(addr) _dl_lookup_address (addr)
+
+extern void attribute_hidden _dl_unmap (struct link_map *map);
+
+#define DL_UNMAP(map) _dl_unmap (map)
+
+#define DL_DT_FUNCTION_ADDRESS(map, start, attr, addr) \
+ attr volatile unsigned long int fptr[2]; \
+ fptr[0] = (unsigned long int) (start); \
+ fptr[1] = (map)->l_info[DT_PLTGOT]->d_un.d_ptr; \
+ addr = (ElfW(Addr)) fptr; \
+
+#define DL_CALL_DT_INIT(map, start, argc, argv, env) \
+{ \
+ ElfW(Addr) addr; \
+ DL_DT_FUNCTION_ADDRESS(map, start, , addr) \
+ dl_init_t init = (dl_init_t) addr; \
+ init (argc, argv, env); \
+}
+
+#define DL_CALL_DT_FINI(map, start) \
+{ \
+ ElfW(Addr) addr; \
+ DL_DT_FUNCTION_ADDRESS(map, start, , addr) \
+ fini_t fini = (fini_t) addr; \
+ fini (); \
+}
+
+/* The type of the return value of fixup/profile_fixup. */
+#define DL_FIXUP_VALUE_TYPE struct fdesc
+/* Construct a value of type DL_FIXUP_VALUE_TYPE from a code address
+ and a link map. */
+#define DL_FIXUP_MAKE_VALUE(map, addr) \
+ ((struct fdesc) { (addr), (map)->l_info[DT_PLTGOT]->d_un.d_ptr })
+/* Extract the code address from a value of type DL_FIXUP_MAKE_VALUE.
+ */
+#define DL_FIXUP_VALUE_CODE_ADDR(value) (value).ip
+
+#define DL_FIXUP_VALUE_ADDR(value) ((uintptr_t) &(value))
+#define DL_FIXUP_ADDR_VALUE(addr) (*(struct fdesc *) (addr))
+#define DL_FIXUP_BINDNOW_ADDR_VALUE(addr) (addr)
+#define DL_FIXUP_BINDNOW_RELOC(l, reloc, value, new_value, st_value, lazy) \
+ (*value) = *(struct fdesc *) (st_value)
diff --git a/sysdeps/ia64/dl-machine.h b/sysdeps/ia64/dl-machine.h
new file mode 100644
index 0000000000..2905574c31
--- /dev/null
+++ b/sysdeps/ia64/dl-machine.h
@@ -0,0 +1,460 @@
+/* Machine-dependent ELF dynamic relocation inline functions. IA-64 version.
+ Copyright (C) 1995-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef dl_machine_h
+#define dl_machine_h 1
+
+#define ELF_MACHINE_NAME "ia64"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+/* Translate a processor specific dynamic tag to the index
+ in l_info array. */
+#define DT_IA_64(x) (DT_IA_64_##x - DT_LOPROC + DT_NUM)
+
+static inline void __attribute__ ((always_inline))
+__ia64_init_bootstrap_fdesc_table (struct link_map *map)
+{
+ Elf64_Addr *boot_table;
+
+ /* careful: this will be called before got has been relocated... */
+ asm (";; addl %0 = @gprel (_dl_boot_fptr_table), gp" : "=r"(boot_table));
+
+ map->l_mach.fptr_table_len = ELF_MACHINE_BOOT_FPTR_TABLE_LEN;
+ map->l_mach.fptr_table = boot_table;
+}
+
+#define ELF_MACHINE_BEFORE_RTLD_RELOC(map, dynamic_info) \
+ __ia64_init_bootstrap_fdesc_table (map);
+
+/* Return nonzero iff ELF header is compatible with the running host. */
+static inline int __attribute__ ((unused))
+elf_machine_matches_host (const Elf64_Ehdr *ehdr)
+{
+ return ehdr->e_machine == EM_IA_64;
+}
+
+
+/* Return the link-time address of _DYNAMIC. */
+static inline Elf64_Addr __attribute__ ((unused, const))
+elf_machine_dynamic (void)
+{
+ Elf64_Addr *p;
+
+ __asm__ (
+ ".section .sdata\n"
+ " .type __dynamic_ltv#, @object\n"
+ " .size __dynamic_ltv#, 8\n"
+ "__dynamic_ltv:\n"
+ " data8 @ltv(_DYNAMIC#)\n"
+ ".previous\n"
+ " addl %0 = @gprel(__dynamic_ltv#), gp ;;"
+ : "=r" (p));
+
+ return *p;
+}
+
+
+/* Return the run-time load address of the shared object. */
+static inline Elf64_Addr __attribute__ ((unused))
+elf_machine_load_address (void)
+{
+ Elf64_Addr ip;
+ int *p;
+
+ __asm__ (
+ "1: mov %0 = ip\n"
+ ".section .sdata\n"
+ "2: data4 @ltv(1b)\n"
+ " .align 8\n"
+ ".previous\n"
+ " addl %1 = @gprel(2b), gp ;;"
+ : "=r" (ip), "=r" (p));
+
+ return ip - (Elf64_Addr) *p;
+}
+
+/* Set up the loaded object described by L so its unrelocated PLT
+ entries will jump to the on-demand fixup code in dl-runtime.c. */
+
+static inline int __attribute__ ((unused, always_inline))
+elf_machine_runtime_setup (struct link_map *l, struct r_scope_elem *scope[],
+ int lazy, int profile)
+{
+ extern void _dl_runtime_resolve (void);
+ extern void _dl_runtime_profile (void);
+
+ if (lazy)
+ {
+ register Elf64_Addr gp __asm__ ("gp");
+ Elf64_Addr *reserve, doit;
+
+ /*
+ * Careful with the typecast here or it will try to add l-l_addr
+ * pointer elements
+ */
+ reserve = ((Elf64_Addr *)
+ (l->l_info[DT_IA_64 (PLT_RESERVE)]->d_un.d_ptr + l->l_addr));
+ /* Identify this shared object. */
+ reserve[0] = (Elf64_Addr) l;
+
+ /* This function will be called to perform the relocation. */
+#ifdef SHARED
+ if (__glibc_unlikely (profile))
+ {
+ if (GLRO(dl_profile) != NULL
+ && _dl_name_match_p (GLRO(dl_profile), l))
+ {
+ /* This is the object we are looking for. Say that we really
+ want profiling and the timers are started. */
+ GL(dl_profile_map) = l;
+ }
+ doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_profile)->ip;
+ }
+ else
+#endif
+ {
+ doit = (Elf64_Addr) ELF_PTR_TO_FDESC (&_dl_runtime_resolve)->ip;
+ }
+
+ reserve[1] = doit;
+ reserve[2] = gp;
+ }
+
+ return lazy;
+}
+
+/* Names of the architecture-specific auditing callback functions. */
+#define ARCH_LA_PLTENTER ia64_gnu_pltenter
+#define ARCH_LA_PLTEXIT ia64_gnu_pltexit
+
+/* Undo the adds out0 = 16, sp below to get at the value we want in
+ __libc_stack_end. */
+#define DL_STACK_END(cookie) \
+ ((void *) (((long) (cookie)) - 16))
+
+/* Initial entry point code for the dynamic linker.
+ The C function `_dl_start' is the real entry point;
+ its return value is the user program's entry point. */
+
+#define RTLD_START asm ( \
+".text\n" \
+" .global _start#\n" \
+" .proc _start#\n" \
+"_start:\n" \
+"0: { .mii\n" \
+" .prologue\n" \
+" .save rp, r0\n" \
+" .body\n" \
+" .prologue\n" \
+" .save ar.pfs, r32\n" \
+" alloc loc0 = ar.pfs, 0, 3, 4, 0\n" \
+" .body\n" \
+" mov r2 = ip\n" \
+" addl r3 = @gprel(0b), r0\n" \
+" ;;\n" \
+" }\n" \
+" { .mlx\n" \
+" /* Calculate the GP, and save a copy in loc1. */\n" \
+" sub gp = r2, r3\n" \
+" movl r8 = 0x9804c0270033f\n" \
+" ;;\n" \
+" }\n" \
+" { .mii\n" \
+" mov ar.fpsr = r8\n" \
+" sub loc1 = r2, r3\n" \
+" /* _dl_start wants a pointer to the pointer to the arg block and\n" \
+" the arg block starts with an integer, thus the magic 16. */\n" \
+" adds out0 = 16, sp\n" \
+" }\n" \
+" { .bbb\n" \
+" br.call.sptk.many b0 = _dl_start#\n" \
+" ;;\n" \
+" }\n" \
+" .endp _start#\n" \
+" /* FALLTHRU */\n" \
+" .global _dl_start_user#\n" \
+" .proc _dl_start_user#\n" \
+"_dl_start_user:\n" \
+" .prologue\n" \
+" .save rp, r0\n" \
+" .body\n" \
+" .prologue\n" \
+" .save ar.pfs, r32\n" \
+" .body\n" \
+" { .mii\n" \
+" /* Save the pointer to the user entry point fptr in loc2. */\n" \
+" mov loc2 = ret0\n" \
+" addl r2 = @ltoff(_dl_argc), gp\n" \
+" ;;\n" \
+" }\n" \
+" { .mii\n" \
+" ld8 out1 = [r2] /* Get the _dl_argc address. */\n" \
+" addl r3 = @ltoff(_dl_argv), gp\n" \
+" ;;\n" \
+" }\n" \
+" { .mmi\n" \
+" ld8 out2 = [r3] /* Get the _dl_argv address. */\n" \
+" ld8 out1 = [out1] /* Get the adjusted _dl_argc. */\n" \
+" addl r2 = @gprel(_rtld_local), gp\n" \
+" ;;\n" \
+" }\n" \
+" { .mmi\n" \
+" sxt4 out3 = out1 /* envp = argv + argc + 1 */\n" \
+" ;;\n" \
+" }\n" \
+" { .mmi\n" \
+" adds out3 = 1, out3\n" \
+" ;;\n" \
+" }\n" \
+" { .mmi\n" \
+" ld8 out2 = [out2] /* Get the adjusted _dl_argv. */\n" \
+" shladd out3 = out3, 3, r0\n" \
+" ;;\n" \
+" }\n" \
+" { .mmb\n" \
+" add out3 = out3, out2\n" \
+" ld8 out0 = [r2] /* Get the linkmap. */\n" \
+" br.call.sptk.many b0 = _dl_init#\n" \
+" }\n" \
+" /* Pass our finalizer function to the user,\n" \
+" and jump to the user's entry point. */\n" \
+" { .mmi\n" \
+" ld8 r3 = [loc2], 8\n" \
+" mov b0 = r0\n" \
+" }\n" \
+" { .mmi\n" \
+" addl ret0 = @ltoff(@fptr(_dl_fini#)), gp\n" \
+" ;;\n" \
+" mov b6 = r3\n" \
+" }\n" \
+" { .mmi\n" \
+" ld8 ret0 = [ret0]\n" \
+" ld8 gp = [loc2]\n" \
+" mov ar.pfs = loc0\n" \
+" ;;\n" \
+" }\n" \
+" { .mfb\n" \
+" br.sptk.many b6\n" \
+" ;;\n" \
+" }\n" \
+" .endp _dl_start_user#\n" \
+".previous\n");
+
+
+#ifndef RTLD_START_SPECIAL_INIT
+#define RTLD_START_SPECIAL_INIT /* nothing */
+#endif
+
+/* ELF_RTYPE_CLASS_PLT iff TYPE describes relocation of a PLT entry or TLS
+ variable, so undefined references should not be allowed to define the
+ value.
+ ELF_RTYPE_CLASS_COPY iff TYPE should not be allowed to resolve to one
+ of the main executable's symbols, as for a COPY reloc, which we don't
+ use. */
+/* ??? Ignore *MSB for now. */
+#define elf_machine_type_class(type) \
+ (((type) == R_IA64_IPLTLSB || (type) == R_IA64_DTPMOD64LSB \
+ || (type) == R_IA64_DTPREL64LSB || (type) == R_IA64_TPREL64LSB) \
+ * ELF_RTYPE_CLASS_PLT)
+
+/* A reloc type used for ld.so cmdline arg lookups to reject PLT entries. */
+#define ELF_MACHINE_JMP_SLOT R_IA64_IPLTLSB
+
+/* Return the address of the entry point. */
+#define ELF_MACHINE_START_ADDRESS(map, start) \
+({ \
+ ElfW(Addr) addr; \
+ DL_DT_FUNCTION_ADDRESS(map, start, static, addr) \
+ addr; \
+})
+
+/* Fixup a PLT entry to bounce directly to the function at VALUE. */
+static inline struct fdesc __attribute__ ((always_inline))
+elf_machine_fixup_plt (struct link_map *l, lookup_t t,
+ const ElfW(Sym) *refsym, const ElfW(Sym) *sym,
+ const Elf64_Rela *reloc,
+ Elf64_Addr *reloc_addr, struct fdesc value)
+{
+ /* l is the link_map for the caller, t is the link_map for the object
+ * being called */
+ /* got has already been relocated in elf_get_dynamic_info() */
+ reloc_addr[1] = value.gp;
+ /* we need a "release" here to ensure that the gp is visible before
+ the code entry point is updated: */
+ ((volatile Elf64_Addr *) reloc_addr)[0] = value.ip;
+ return value;
+}
+
+/* Return the final value of a plt relocation. */
+static inline struct fdesc
+elf_machine_plt_value (struct link_map *map, const Elf64_Rela *reloc,
+ struct fdesc value)
+{
+ /* No need to handle rel vs rela since IA64 is rela only */
+ return (struct fdesc) { value.ip + reloc->r_addend, value.gp };
+}
+
+#endif /* !dl_machine_h */
+
+#ifdef RESOLVE_MAP
+
+#define R_IA64_TYPE(R) ((R) & -8)
+#define R_IA64_FORMAT(R) ((R) & 7)
+
+#define R_IA64_FORMAT_32MSB 4
+#define R_IA64_FORMAT_32LSB 5
+#define R_IA64_FORMAT_64MSB 6
+#define R_IA64_FORMAT_64LSB 7
+
+
+/* Perform the relocation specified by RELOC and SYM (which is fully
+ resolved). MAP is the object containing the reloc. */
+static inline void
+__attribute ((always_inline))
+elf_machine_rela (struct link_map *map, struct r_scope_elem *scope[],
+ const Elf64_Rela *reloc,
+ const Elf64_Sym *sym,
+ const struct r_found_version *version,
+ void *const reloc_addr_arg,
+ int skip_ifunc)
+{
+ Elf64_Addr *const reloc_addr = reloc_addr_arg;
+ const unsigned long int r_type = ELF64_R_TYPE (reloc->r_info);
+ Elf64_Addr value;
+
+ /* We cannot use a switch here because we cannot locate the switch
+ jump table until we've self-relocated. */
+
+#if !defined RTLD_BOOTSTRAP
+ if (__builtin_expect (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_REL64LSB),
+ 0))
+ {
+ assert (ELF64_R_TYPE (reloc->r_info) == R_IA64_REL64LSB);
+ value = *reloc_addr + map->l_addr;
+ }
+ else
+#endif
+ if (__builtin_expect (r_type == R_IA64_NONE, 0))
+ return;
+ else
+ {
+ struct link_map *sym_map = RESOLVE_MAP (map, scope, &sym, version,
+ r_type);
+
+ /* RESOLVE_MAP() will return NULL if it fail to locate the symbol. */
+ if (sym_map != NULL)
+ {
+ value = SYMBOL_ADDRESS (sym_map, sym, true) + reloc->r_addend;
+
+ if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_DIR64LSB))
+ ;/* No adjustment. */
+ else if (r_type == R_IA64_IPLTLSB)
+ {
+ elf_machine_fixup_plt (NULL, NULL, NULL, NULL, reloc, reloc_addr,
+ DL_FIXUP_MAKE_VALUE (sym_map, value));
+ return;
+ }
+ else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_FPTR64LSB))
+ value = _dl_make_fptr (sym_map, sym, value);
+ else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_PCREL64LSB))
+ value -= (Elf64_Addr) reloc_addr & -16;
+ else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_DTPMOD64LSB))
+#ifdef RTLD_BOOTSTRAP
+ /* During startup the dynamic linker is always index 1. */
+ value = 1;
+#else
+ /* Get the information from the link map returned by the
+ resolv function. */
+ value = sym_map->l_tls_modid;
+ else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_DTPREL64LSB))
+ value -= sym_map->l_addr;
+#endif
+ else if (R_IA64_TYPE (r_type) == R_IA64_TYPE (R_IA64_TPREL64LSB))
+ {
+#ifndef RTLD_BOOTSTRAP
+ CHECK_STATIC_TLS (map, sym_map);
+#endif
+ value += sym_map->l_tls_offset - sym_map->l_addr;
+ }
+ else
+ _dl_reloc_bad_type (map, r_type, 0);
+ }
+ else
+ value = 0;
+ }
+
+ /* ??? Ignore MSB and Instruction format for now. */
+ if (R_IA64_FORMAT (r_type) == R_IA64_FORMAT_64LSB)
+ *reloc_addr = value;
+ else if (R_IA64_FORMAT (r_type) == R_IA64_FORMAT_32LSB)
+ *(int *) reloc_addr = value;
+ else if (r_type == R_IA64_IPLTLSB)
+ {
+ reloc_addr[0] = 0;
+ reloc_addr[1] = 0;
+ }
+ else
+ _dl_reloc_bad_type (map, r_type, 0);
+}
+
+/* Let do-rel.h know that on IA-64 if l_addr is 0, all RELATIVE relocs
+ can be skipped. */
+#define ELF_MACHINE_REL_RELATIVE 1
+
+static inline void
+__attribute ((always_inline))
+elf_machine_rela_relative (Elf64_Addr l_addr, const Elf64_Rela *reloc,
+ void *const reloc_addr_arg)
+{
+ Elf64_Addr *const reloc_addr = reloc_addr_arg;
+ /* ??? Ignore MSB and Instruction format for now. */
+ assert (ELF64_R_TYPE (reloc->r_info) == R_IA64_REL64LSB);
+
+ *reloc_addr += l_addr;
+}
+
+/* Perform a RELATIVE reloc on the .got entry that transfers to the .plt. */
+static inline void
+__attribute ((always_inline))
+elf_machine_lazy_rel (struct link_map *map, struct r_scope_elem *scope[],
+ Elf64_Addr l_addr, const Elf64_Rela *reloc,
+ int skip_ifunc)
+{
+ Elf64_Addr *const reloc_addr = (void *) (l_addr + reloc->r_offset);
+ const unsigned long int r_type = ELF64_R_TYPE (reloc->r_info);
+
+ if (r_type == R_IA64_IPLTLSB)
+ {
+ reloc_addr[0] += l_addr;
+ reloc_addr[1] += l_addr;
+ }
+ else if (r_type == R_IA64_NONE)
+ return;
+ else
+ _dl_reloc_bad_type (map, r_type, 1);
+}
+
+#endif /* RESOLVE_MAP */
diff --git a/sysdeps/ia64/dl-tls.h b/sysdeps/ia64/dl-tls.h
new file mode 100644
index 0000000000..c7af27698c
--- /dev/null
+++ b/sysdeps/ia64/dl-tls.h
@@ -0,0 +1,41 @@
+/* Thread-local storage handling in the ELF dynamic linker. IA-64 version.
+ Copyright (C) 2002-2025 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef _DL_TLS_H
+#define _DL_TLS_H
+
+/* On IA-64 the __tls_get_addr function take the module ID and the
+ offset as parameters. */
+#define GET_ADDR_ARGS size_t tls_ia64_m, size_t tls_ia64_offset
+#define GET_ADDR_PARAM tls_ia64_m, tls_ia64_offset
+#define GET_ADDR_MODULE tls_ia64_m
+#define GET_ADDR_OFFSET tls_ia64_offset
+
+/* We have no tls_index type. */
+#define DONT_USE_TLS_INDEX 1
+
+/* Dynamic thread vector pointers point to the start of each
+ TLS block. */
+#define TLS_DTV_OFFSET 0
+
+/* Static TLS offsets are relative to the unadjusted thread pointer. */
+#define TLS_TP_OFFSET 0
+
+extern void *__tls_get_addr (size_t m, size_t offset);
+
+#endif /* _DL_TLS_H */
diff --git a/sysdeps/ia64/dl-trampoline.S b/sysdeps/ia64/dl-trampoline.S
new file mode 100644
index 0000000000..d2c3c6dcf6
--- /dev/null
+++ b/sysdeps/ia64/dl-trampoline.S
@@ -0,0 +1,538 @@
+/* PLT trampolines. ia64 version.
+ Copyright (C) 2005-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+#undef ret
+
+/*
+ This code is used in dl-runtime.c to call the `_dl_fixup' function
+ and then redirect to the address it returns. `_dl_fixup()' takes two
+ arguments, however _dl_profile_fixup() takes five.
+
+ The ABI specifies that we will never see more than 8 input
+ registers to a function call, thus it is safe to simply allocate
+ those, and simpler than playing stack games. */
+
+/* Used to save and restore 8 incoming fp registers */
+#define RESOLVE_FRAME_SIZE (16*8)
+
+ENTRY(_dl_runtime_resolve)
+ { .mmi
+ .prologue
+ .save ar.pfs, r40
+ alloc loc0 = ar.pfs, 8, 6, 2, 0
+ /* Use the 16 byte scratch area. r2 will start at f8 and
+ r3 will start at f9. */
+ adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
+ adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
+ }
+ { .mii
+ .fframe RESOLVE_FRAME_SIZE
+ adds r12 = -RESOLVE_FRAME_SIZE, r12
+ .save rp, loc1
+ mov loc1 = b0
+ .body
+ mov loc2 = r8 /* preserve struct value register */
+ ;;
+ }
+ { .mii
+ mov loc3 = r9 /* preserve language specific register */
+ mov loc4 = r10 /* preserve language specific register */
+ mov loc5 = r11 /* preserve language specific register */
+ }
+ { .mmi
+ stf.spill [r2] = f8, 32
+ stf.spill [r3] = f9, 32
+ mov out0 = r16
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f10, 32
+ stf.spill [r3] = f11, 32
+ shl out1 = r15, 4
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f12, 32
+ stf.spill [r3] = f13, 32
+ /* Relocation record is 24 byte. */
+ shladd out1 = r15, 3, out1
+ ;;
+ }
+ { .mmb
+ stf.spill [r2] = f14
+ stf.spill [r3] = f15
+ br.call.sptk.many b0 = _dl_fixup
+ }
+ { .mii
+ /* Skip the 16byte scratch area. */
+ adds r2 = 16, r12
+ adds r3 = 32, r12
+ mov b6 = ret0
+ ;;
+ }
+ { .mmi
+ ldf.fill f8 = [r2], 32
+ ldf.fill f9 = [r3], 32
+ mov b0 = loc1
+ ;;
+ }
+ { .mmi
+ ldf.fill f10 = [r2], 32
+ ldf.fill f11 = [r3], 32
+ mov gp = ret1
+ ;;
+ }
+ { .mmi
+ ldf.fill f12 = [r2], 32
+ ldf.fill f13 = [r3], 32
+ mov ar.pfs = loc0
+ ;;
+ }
+ { .mmi
+ ldf.fill f14 = [r2], 32
+ ldf.fill f15 = [r3], 32
+ .restore sp /* pop the unwind frame state */
+ adds r12 = RESOLVE_FRAME_SIZE, r12
+ ;;
+ }
+ { .mii
+ mov r9 = loc3 /* restore language specific register */
+ mov r10 = loc4 /* restore language specific register */
+ mov r11 = loc5 /* restore language specific register */
+ }
+ { .mii
+ mov r8 = loc2 /* restore struct value register */
+ ;;
+ }
+ /* An alloc is needed for the break system call to work.
+ We don't care about the old value of the pfs register. */
+ { .mmb
+ .prologue
+ .body
+ alloc r2 = ar.pfs, 0, 0, 8, 0
+ br.sptk.many b6
+ ;;
+ }
+END(_dl_runtime_resolve)
+
+
+/* The fourth argument to _dl_profile_fixup and the third one to
+ _dl_audit_pltexit are a pointer to La_ia64_regs:
+
+ 8byte r8
+ 8byte r9
+ 8byte r10
+ 8byte r11
+ 8byte in0
+ 8byte in1
+ 8byte in2
+ 8byte in3
+ 8byte in4
+ 8byte in5
+ 8byte in6
+ 8byte in7
+ 16byte f8
+ 16byte f9
+ 16byte f10
+ 16byte f11
+ 16byte f12
+ 16byte f13
+ 16byte f14
+ 16byte f15
+ 8byte ar.unat
+ 8byte sp
+
+ The fifth argument to _dl_profile_fixup is a pointer to long int.
+ The fourth argument to _dl_audit_pltexit is a pointer to
+ La_ia64_retval:
+
+ 8byte r8
+ 8byte r9
+ 8byte r10
+ 8byte r11
+ 16byte f8
+ 16byte f9
+ 16byte f10
+ 16byte f11
+ 16byte f12
+ 16byte f13
+ 16byte f14
+ 16byte f15
+
+ Since stack has to be 16 byte aligned, the stack allocation is in
+ 16byte increment. Before calling _dl_profile_fixup, the stack will
+ look like
+
+ psp new frame_size
+ +16 La_ia64_regs
+ sp scratch
+
+ */
+
+#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
+#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
+
+#if !defined PROF && defined SHARED
+ENTRY(_dl_runtime_profile)
+ { .mii
+ .prologue
+ .save ar.pfs, r40
+ alloc loc0 = ar.pfs, 8, 12, 8, 0
+ .vframe loc10
+ mov loc10 = r12
+ .save rp, loc1
+ mov loc1 = b0
+ }
+ { .mii
+ .save ar.unat, r17
+ mov r17 = ar.unat
+ .save ar.lc, loc6
+ mov loc6 = ar.lc
+ mov loc11 = gp
+ }
+ { .mii
+ .body
+ /* There is a 16 byte scratch area. r2 will start at r8 and
+ r3 will start at r9 for La_ia64_regs. */
+ adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
+ adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
+ adds r12 = -PLTENTER_FRAME_SIZE, r12
+ ;;
+ }
+ { .mmi
+ st8 [r2] = r8, 16;
+ st8 [r3] = r9, 16;
+ mov out2 = b0 /* needed by _dl_fixup_profile */
+ ;;
+ }
+ { .mmi
+ st8 [r2] = r10, 16;
+ st8 [r3] = r11, 16;
+ adds out3 = 16, r12 /* pointer to La_ia64_regs */
+ ;;
+ }
+ { .mmi
+ .mem.offset 0, 0
+ st8.spill [r2] = in0, 16
+ .mem.offset 8, 0
+ st8.spill [r3] = in1, 16
+ mov out4 = loc10 /* pointer to new frame size */
+ ;;
+ }
+ { .mmi
+ .mem.offset 0, 0
+ st8.spill [r2] = in2, 16
+ .mem.offset 8, 0
+ st8.spill [r3] = in3, 16
+ mov loc2 = r8 /* preserve struct value register */
+ ;;
+ }
+ { .mmi
+ .mem.offset 0, 0
+ st8.spill [r2] = in4, 16
+ .mem.offset 8, 0
+ st8.spill [r3] = in5, 16
+ mov loc3 = r9 /* preserve language specific register */
+ ;;
+ }
+ { .mmi
+ .mem.offset 0, 0
+ st8 [r2] = in6, 16
+ .mem.offset 8, 0
+ st8 [r3] = in7, 24 /* adjust for f9 */
+ mov loc4 = r10 /* preserve language specific register */
+ ;;
+ }
+ { .mii
+ mov r18 = ar.unat /* save it in La_ia64_regs */
+ mov loc7 = out3 /* save it for _dl_audit_pltexit */
+ mov loc5 = r11 /* preserve language specific register */
+ }
+ { .mmi
+ stf.spill [r2] = f8, 32
+ stf.spill [r3] = f9, 32
+ mov out0 = r16 /* needed by _dl_fixup_profile */
+ ;;
+ }
+ { .mii
+ mov ar.unat = r17 /* restore it for function call */
+ mov loc8 = r16 /* save it for _dl_audit_pltexit */
+ nop.i 0x0
+ }
+ { .mmi
+ stf.spill [r2] = f10, 32
+ stf.spill [r3] = f11, 32
+ shl out1 = r15, 4
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f12, 32
+ stf.spill [r3] = f13, 32
+ /* Relocation record is 24 byte. */
+ shladd out1 = r15, 3, out1
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f14, 32
+ stf.spill [r3] = f15, 24
+ mov loc9 = out1 /* save it for _dl_audit_pltexit */
+ ;;
+ }
+ { .mmb
+ st8 [r2] = r18 /* store ar.unat */
+ st8 [r3] = loc10 /* store sp */
+ br.call.sptk.many b0 = _dl_profile_fixup
+ }
+ { .mii
+ /* Skip the 16byte scratch area, 4 language specific GRs and
+ 8 incoming GRs to restore incoming fp registers. */
+ adds r2 = (4*8 + 8*8 + 16), r12
+ adds r3 = (4*8 + 8*8 + 32), r12
+ mov b6 = ret0
+ ;;
+ }
+ { .mmi
+ ldf.fill f8 = [r2], 32
+ ldf.fill f9 = [r3], 32
+ mov gp = ret1
+ ;;
+ }
+ { .mmi
+ ldf.fill f10 = [r2], 32
+ ldf.fill f11 = [r3], 32
+ mov r8 = loc2 /* restore struct value register */
+ ;;
+ }
+ { .mmi
+ ldf.fill f12 = [r2], 32
+ ldf.fill f13 = [r3], 32
+ mov r9 = loc3 /* restore language specific register */
+ ;;
+ }
+ { .mmi
+ ldf.fill f14 = [r2], 32
+ ldf.fill f15 = [r3], 32
+ mov r10 = loc4 /* restore language specific register */
+ ;;
+ }
+ { .mii
+ ld8 r15 = [loc10] /* load the new frame size */
+ mov r11 = loc5 /* restore language specific register */
+ ;;
+ cmp.eq p6, p7 = -1, r15
+ ;;
+ }
+ { .mii
+(p7) cmp.eq p8, p9 = 0, r15
+(p6) mov b0 = loc1
+(p6) mov ar.lc = loc6
+ }
+ { .mib
+ nop.m 0x0
+(p6) mov ar.pfs = loc0
+(p6) br.cond.dptk.many .Lresolved
+ ;;
+ }
+
+ /* At this point, the stack looks like
+
+ +psp free
+ +16 La_ia64_regs
+ sp scratch
+
+ We need to keep the current stack and call the resolved
+ function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
+ + 16 (scratch area) to sp + 16 (scratch area). Since stack
+ has to be 16byte aligned, we around r15 up to 16byte. */
+
+ { .mbb
+(p9) adds r15 = 15, r15
+(p8) br.cond.dptk.many .Lno_new_frame
+ nop.b 0x0
+ ;;
+ }
+ { .mmi
+ and r15 = -16, r15
+ ;;
+ /* We don't copy the 16byte scratch area. Prepare r16/r17 as
+ destination. */
+ sub r16 = r12, r15
+ sub r17 = r12, r15
+ ;;
+ }
+ { .mii
+ adds r16 = 16, r16
+ adds r17 = 24, r17
+ sub r12 = r12, r15 /* Adjust stack */
+ ;;
+ }
+ { .mii
+ nop.m 0x0
+ shr r15 = r15, 4
+ ;;
+ adds r15 = -1, r15
+ ;;
+ }
+ { .mii
+ /* Skip the 16byte scratch area. Prepare r2/r3 as source. */
+ adds r2 = 16, loc10
+ adds r3 = 24, loc10
+ mov ar.lc = r15
+ ;;
+ }
+.Lcopy:
+ { .mmi
+ ld8 r18 = [r2], 16
+ ld8 r19 = [r3], 16
+ nop.i 0x0
+ ;;
+ }
+ { .mmb
+ st8 [r16] = r18, 16
+ st8 [r17] = r19, 16
+ br.cloop.sptk.few .Lcopy
+ }
+.Lno_new_frame:
+ { .mii
+ mov out0 = in0
+ mov out1 = in1
+ mov out2 = in2
+ }
+ { .mii
+ mov out3 = in3
+ mov out4 = in4
+ mov out5 = in5
+ }
+ { .mib
+ mov out6 = in6
+ mov out7 = in7
+ /* Call the resolved function */
+ br.call.sptk.many b0 = b6
+ }
+ { .mii
+ /* Prepare stack for _dl_audit_pltexit. Loc10 has the original
+ stack pointer. */
+ adds r12 = -PLTEXIT_FRAME_SIZE, loc10
+ adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
+ adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
+ ;;
+ }
+ { .mmi
+ /* Load all possible return values into buffer. */
+ st8 [r2] = r8, 16
+ st8 [r3] = r9, 16
+ mov out0 = loc8
+ ;;
+ }
+ { .mmi
+ st8 [r2] = r10, 16
+ st8 [r3] = r11, 24
+ mov out1 = loc9
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f8, 32
+ stf.spill [r3] = f9, 32
+ mov out2 = loc7 /* Pointer to La_ia64_regs */
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f10, 32
+ stf.spill [r3] = f11, 32
+ adds out3 = 16, r12 /* Pointer to La_ia64_retval */
+ ;;
+ }
+ { .mmi
+ stf.spill [r2] = f12, 32
+ stf.spill [r3] = f13, 32
+ /* We need to restore gp for _dl_audit_pltexit. */
+ mov gp = loc11
+ ;;
+ }
+ { .mmb
+ stf.spill [r2] = f14
+ stf.spill [r3] = f15
+ br.call.sptk.many b0 = _dl_audit_pltexit
+ }
+ { .mmi
+ /* Load all the non-floating and floating return values. Skip
+ the 16byte scratch area. */
+ adds r2 = 16, r12
+ adds r3 = 24, r12
+ nop.i 0x0
+ ;;
+ }
+ { .mmi
+ ld8 r8 = [r2], 16
+ ld8 r9 = [r3], 16
+ nop.i 0x0
+ ;;
+ }
+ { .mmi
+ ld8 r10 = [r2], 16
+ ld8 r11 = [r3], 24
+ nop.i 0x0
+ ;;
+ }
+ { .mmi
+ ldf.fill f8 = [r2], 32
+ ldf.fill f9 = [r3], 32
+ mov ar.lc = loc6
+ ;;
+ }
+ { .mmi
+ ldf.fill f10 = [r2], 32
+ ldf.fill f11 = [r3], 32
+ mov ar.pfs = loc0
+ ;;
+ }
+ { .mmi
+ ldf.fill f12 = [r2], 32
+ ldf.fill f13 = [r3], 32
+ mov b0 = loc1
+ ;;
+ }
+ { .mmi
+ ldf.fill f14 = [r2]
+ ldf.fill f15 = [r3]
+ /* We know that the previous stack pointer, loc10, isn't 0.
+ We use it to reload p7. */
+ cmp.ne p7, p0 = 0, loc10
+ ;;
+ }
+.Lresolved:
+ { .mmb
+ .restore sp
+ mov r12 = loc10
+(p7) br.ret.sptk.many b0
+ ;;
+ }
+ /* An alloc is needed for the break system call to work. We
+ don't care about the old value of the pfs register. After
+ this alloc, we can't use any rotating registers. Otherwise
+ assembler won't be happy. This has to be at the end. */
+ { .mmb
+ .prologue
+ .body
+ alloc r2 = ar.pfs, 0, 0, 8, 0
+ br.sptk.many b6
+ ;;
+ }
+END(_dl_runtime_profile)
+#endif
diff --git a/sysdeps/ia64/elf-initfini.h b/sysdeps/ia64/elf-initfini.h
new file mode 100644
index 0000000000..02290f4b8a
--- /dev/null
+++ b/sysdeps/ia64/elf-initfini.h
@@ -0,0 +1,20 @@
+/* Determine DT_INIT/DT_FINI support in the dynamic loader. IA64 version.
+ Copyright (C) 2020-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+/* Enable DT_INIT/DT_FINI support. */
+#define ELF_INITFINI 1
diff --git a/sysdeps/ia64/entry.h b/sysdeps/ia64/entry.h
new file mode 100644
index 0000000000..9ae8b8615c
--- /dev/null
+++ b/sysdeps/ia64/entry.h
@@ -0,0 +1,8 @@
+#include
+#include
+
+extern void _start (void);
+
+/* The function's entry point is stored in the first word of the
+ function descriptor (plabel) of _start(). */
+#define ENTRY_POINT ELF_PTR_TO_FDESC (_start)->ip
diff --git a/sysdeps/ia64/float128-abi.h b/sysdeps/ia64/float128-abi.h
new file mode 100644
index 0000000000..8e7616bc7e
--- /dev/null
+++ b/sysdeps/ia64/float128-abi.h
@@ -0,0 +1,3 @@
+/* ABI version for _Float128 ABI introduction. */
+#define FLOAT128_VERSION GLIBC_2.26
+#define FLOAT128_VERSION_M GLIBC_2_26
diff --git a/sysdeps/ia64/fpu/Makefile b/sysdeps/ia64/fpu/Makefile
new file mode 100644
index 0000000000..384fc836af
--- /dev/null
+++ b/sysdeps/ia64/fpu/Makefile
@@ -0,0 +1,34 @@
+ifeq ($(subdir),math)
+#
+# Some files which need to go both into libc and libm have external
+# dependencies which need to be resolved differently for libc
+# vs. libm. For example, inside libc, __libm_error_support needs to
+# resolve to HIDDEN_JUMPTARGET(__libm_error_support) whereas within
+# libm it always resolves to __libm_error_support. Such files need to
+# be compiled twice. Fortunately, math/Makefile already has logic to
+# support this: if a file starts with "s_", make will automatically
+# generate a matching file whose name starts with "m_" which simply
+# includes the corresponding "s_" file.
+#
+duplicated-routines = s_libm_ldexp s_libm_ldexpf s_libm_ldexpl \
+ s_libm_scalbn s_libm_scalbnf s_libm_scalbnl
+
+libm-sysdep_routines += s_erfc s_erfcf s_erfcl \
+ s_matherrf s_matherrl libm_reduce \
+ libm_error \
+ libm_frexp libm_frexpf libm_frexpl \
+ libm_sincos libm_sincosf libm_sincosl \
+ libm_sincos_large \
+ libm_lgamma libm_lgammaf libm_lgammal \
+ libm_scalblnf \
+ $(duplicated-routines:s_%=m_%)
+
+sysdep_routines += libc_libm_error libm_frexp libm_frexpf libm_frexpl \
+ $(duplicated-routines)
+
+sysdep-CPPFLAGS += -include libm-symbols.h \
+ -D__POSIX__ -Dopensource \
+ -D_LIB_VERSIONIMF=_LIB_VERSION \
+ -DSIZE_INT_32 -DSIZE_LONG_INT_64 -DSIZE_LONG_LONG_INT_64 \
+ -DSIZE_LONG_64 -DIA64
+endif
diff --git a/sysdeps/ia64/fpu/README b/sysdeps/ia64/fpu/README
new file mode 100644
index 0000000000..6f4af0678a
--- /dev/null
+++ b/sysdeps/ia64/fpu/README
@@ -0,0 +1,50 @@
+ ----------------------------------------------------------
+ Notes on how to update libm based on Intel's libm releases
+ ----------------------------------------------------------
+
+This source code in this directory is currently based on Intel libm
+v2.1 as available from:
+
+ http://www.intel.com/software/products/opensource/libraries/num.htm
+
+To ease importing, fix some bugs, and simplify integration into libc,
+it is also necessary to apply the patch at:
+
+ ftp://ftp.hpl.hp.com/pub/linux-ia64/intel-libm-041228.diff.gz
+
+The expectation is that Intel will integrate most if not all of these
+changes into future releases of libm, so this patching step can
+hopefully be omitted in the future.
+
+Once the patched libm sources are extracted in a directory $LIBM, they
+can be imported into the libc source tree at $LIBC with the following
+step:
+
+ $ cd $LIBC/src/sysdep/ia64/fpu
+ $ ./import_intel_libm $LIBM
+
+This should produce a number of "Importing..." messages, without
+showing any errors.
+
+At this point, you should be able to build glibc in the usual fashion.
+We assume you do this in directory $OBJ. Once the build has
+completed, run "make check" to verify that all (math) checks succeed.
+If these checks succeed, you should also run the following commands to
+verify that the new libm doesn't pollute the name-space and has proper
+size-info for the data objects:
+
+ $ cd $LIBC/src/sysdep/ia64/fpu
+ $ import_check $OBJ/math/
+
+There should be no (unexpected) errors reported by this script.
+
+As an optional step, you may also want to confirm that the new libm
+exports the exact same global symbols as the old one.
+
+If you want to see the changes introduced by the "import_intel_libm"
+script, you can run the commands:
+
+ $ cd $LIBC/src/sysdep/ia64/fpu
+ $ import_diffs
+
+That's it.
diff --git a/sysdeps/ia64/fpu/Versions b/sysdeps/ia64/fpu/Versions
new file mode 100644
index 0000000000..1faea6458d
--- /dev/null
+++ b/sysdeps/ia64/fpu/Versions
@@ -0,0 +1,10 @@
+libc {
+ GLIBC_PRIVATE {
+ __libm_frexp_4; __libm_frexp_4f; __libm_frexp_4l; __libm_error_support;
+ }
+}
+libm {
+ GLIBC_2.2.3 {
+ matherrf; matherrl;
+ }
+}
diff --git a/sysdeps/ia64/fpu/branred.c b/sysdeps/ia64/fpu/branred.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/branred.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/doasin.c b/sysdeps/ia64/fpu/doasin.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/doasin.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/dosincos.c b/sysdeps/ia64/fpu/dosincos.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/dosincos.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_acos.S b/sysdeps/ia64/fpu/e_acos.S
new file mode 100644
index 0000000000..9543e31cdc
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acos.S
@@ -0,0 +1,878 @@
+.file "acos.s"
+
+
+// Copyright (c) 2000 - 2003 Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 08/17/00 New and much faster algorithm.
+// 08/30/00 Avoided bank conflicts on loads, shortened |x|=1 and x=0 paths,
+// fixed mfb split issue stalls.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/02/02 New and much faster algorithm II
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+
+// Description
+//=========================================
+// The acos function computes the principal value of the arc cosine of x.
+// acos(0) returns Pi/2, acos(1) returns 0, acos(-1) returns Pi.
+// A domain error occurs for arguments not in the range [-1,+1].
+//
+// The acos function returns the arc cosine in the range [0, Pi] radians.
+//
+// There are 8 paths:
+// 1. x = +/-0.0
+// Return acos(x) = Pi/2 + x
+//
+// 2. 0.0 < |x| < 0.625
+// Return acos(x) = Pi/2 - x - x^3 *PolA(x^2)
+// where PolA(x^2) = A3 + A5*x^2 + A7*x^4 +...+ A35*x^32
+//
+// 3. 0.625 <=|x| < 1.0
+// Return acos(x) = Pi/2 - asin(x) =
+// = Pi/2 - sign(x) * ( Pi/2 - sqrt(R) * PolB(R))
+// Where R = 1 - |x|,
+// PolB(R) = B0 + B1*R + B2*R^2 +...+B12*R^12
+//
+// sqrt(R) is approximated using the following sequence:
+// y0 = (1 + eps)/sqrt(R) - initial approximation by frsqrta,
+// |eps| < 2^(-8)
+// Then 3 iterations are used to refine the result:
+// H0 = 0.5*y0
+// S0 = R*y0
+//
+// d0 = 0.5 - H0*S0
+// H1 = H0 + d0*H0
+// S1 = S0 + d0*S0
+//
+// d1 = 0.5 - H1*S1
+// H2 = H1 + d0*H1
+// S2 = S1 + d0*S1
+//
+// d2 = 0.5 - H2*S2
+// S3 = S3 + d2*S3
+//
+// S3 approximates sqrt(R) with enough accuracy for this algorithm
+//
+// So, the result should be reconstracted as follows:
+// acos(x) = Pi/2 - sign(x) * (Pi/2 - S3*PolB(R))
+//
+// But for optimization purposes the reconstruction step is slightly
+// changed:
+// acos(x) = Cpi + sign(x)*PolB(R)*S2 - sign(x)*d2*S2*PolB(R)
+// where Cpi = 0 if x > 0 and Cpi = Pi if x < 0
+//
+// 4. |x| = 1.0
+// Return acos(1.0) = 0.0, acos(-1.0) = Pi
+//
+// 5. 1.0 < |x| <= +INF
+// A domain error occurs for arguments not in the range [-1,+1]
+//
+// 6. x = [S,Q]NaN
+// Return acos(x) = QNaN
+//
+// 7. x is denormal
+// Return acos(x) = Pi/2 - x,
+//
+// 8. x is unnormal
+// Normalize input in f8 and return to the very beginning of the function
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6, f7, f9 -> f15, f32 -> f64
+
+// General registers used:
+// r3, r21 -> r31, r32 -> r38
+
+// Predicate registers used:
+// p0, p6 -> p14
+
+//
+// Assembly macros
+//=========================================
+// integer registers used
+// scratch
+rTblAddr = r3
+
+rPiBy2Ptr = r21
+rTmpPtr3 = r22
+rDenoBound = r23
+rOne = r24
+rAbsXBits = r25
+rHalf = r26
+r0625 = r27
+rSign = r28
+rXBits = r29
+rTmpPtr2 = r30
+rTmpPtr1 = r31
+
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+// scratch
+fXSqr = f6
+fXCube = f7
+fXQuadr = f9
+f1pX = f10
+f1mX = f11
+f1pXRcp = f12
+f1mXRcp = f13
+fH = f14
+fS = f15
+// stacked
+fA3 = f32
+fB1 = f32
+fA5 = f33
+fB2 = f33
+fA7 = f34
+fPiBy2 = f34
+fA9 = f35
+fA11 = f36
+fB10 = f35
+fB11 = f36
+fA13 = f37
+fA15 = f38
+fB4 = f37
+fB5 = f38
+fA17 = f39
+fA19 = f40
+fB6 = f39
+fB7 = f40
+fA21 = f41
+fA23 = f42
+fB3 = f41
+fB8 = f42
+fA25 = f43
+fA27 = f44
+fB9 = f43
+fB12 = f44
+fA29 = f45
+fA31 = f46
+fA33 = f47
+fA35 = f48
+fBaseP = f49
+fB0 = f50
+fSignedS = f51
+fD = f52
+fHalf = f53
+fR = f54
+fCloseTo1Pol = f55
+fSignX = f56
+fDenoBound = f57
+fNormX = f58
+fX8 = f59
+fRSqr = f60
+fRQuadr = f61
+fR8 = f62
+fX16 = f63
+fCpi = f64
+
+// Data tables
+//==============================================================
+RODATA
+.align 16
+LOCAL_OBJECT_START(acos_base_range_table)
+// Ai: Polynomial coefficients for the acos(x), |x| < .625000
+// Bi: Polynomial coefficients for the acos(x), |x| > .625000
+data8 0xBFDAAB56C01AE468 //A29
+data8 0x3FE1C470B76A5B2B //A31
+data8 0xBFDC5FF82A0C4205 //A33
+data8 0x3FC71FD88BFE93F0 //A35
+data8 0xB504F333F9DE6487, 0x00003FFF //B0
+data8 0xAAAAAAAAAAAAFC18, 0x00003FFC //A3
+data8 0x3F9F1C71BC4A7823 //A9
+data8 0x3F96E8BBAAB216B2 //A11
+data8 0x3F91C4CA1F9F8A98 //A13
+data8 0x3F8C9DDCEDEBE7A6 //A15
+data8 0x3F877784442B1516 //A17
+data8 0x3F859C0491802BA2 //A19
+data8 0x9999999998C88B8F, 0x00003FFB //A5
+data8 0x3F6BD7A9A660BF5E //A21
+data8 0x3F9FC1659340419D //A23
+data8 0xB6DB6DB798149BDF, 0x00003FFA //A7
+data8 0xBFB3EF18964D3ED3 //A25
+data8 0x3FCD285315542CF2 //A27
+data8 0xF15BEEEFF7D2966A, 0x00003FFB //B1
+data8 0x3EF0DDA376D10FB3 //B10
+data8 0xBEB83CAFE05EBAC9 //B11
+data8 0x3F65FFB67B513644 //B4
+data8 0x3F5032FBB86A4501 //B5
+data8 0x3F392162276C7CBA //B6
+data8 0x3F2435949FD98BDF //B7
+data8 0xD93923D7FA08341C, 0x00003FF9 //B2
+data8 0x3F802995B6D90BDB //B3
+data8 0x3F10DF86B341A63F //B8
+data8 0xC90FDAA22168C235, 0x00003FFF // Pi/2
+data8 0x3EFA3EBD6B0ECB9D //B9
+data8 0x3EDE18BA080E9098 //B12
+LOCAL_OBJECT_END(acos_base_range_table)
+
+.section .text
+GLOBAL_IEEE754_ENTRY(acos)
+acos_unnormal_back:
+{ .mfi
+ getf.d rXBits = f8 // grab bits of input value
+ // set p12 = 1 if x is a NaN, denormal, or zero
+ fclass.m p12, p0 = f8, 0xcf
+ adds rSign = 1, r0
+}
+{ .mfi
+ addl rTblAddr = @ltoff(acos_base_range_table),gp
+ // 1 - x = 1 - |x| for positive x
+ fms.s1 f1mX = f1, f1, f8
+ addl rHalf = 0xFFFE, r0 // exponent of 1/2
+}
+;;
+{ .mfi
+ addl r0625 = 0x3FE4, r0 // high 16 bits of 0.625
+ // set p8 = 1 if x < 0
+ fcmp.lt.s1 p8, p9 = f8, f0
+ shl rSign = rSign, 63 // sign bit
+}
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ // 1 + x = 1 - |x| for negative x
+ fma.s1 f1pX = f1, f1, f8
+ adds rOne = 0x3FF, r0
+}
+;;
+{ .mfi
+ andcm rAbsXBits = rXBits, rSign // bits of |x|
+ fmerge.s fSignX = f8, f1 // signum(x)
+ shl r0625 = r0625, 48 // bits of DP representation of 0.625
+}
+{ .mfb
+ setf.exp fHalf = rHalf // load A2 to FP reg
+ fma.s1 fXSqr = f8, f8, f0 // x^2
+ // branch on special path if x is a NaN, denormal, or zero
+(p12) br.cond.spnt acos_special
+}
+;;
+{ .mfi
+ adds rPiBy2Ptr = 272, rTblAddr
+ nop.f 0
+ shl rOne = rOne, 52 // bits of 1.0
+}
+{ .mfi
+ adds rTmpPtr1 = 16, rTblAddr
+ nop.f 0
+ // set p6 = 1 if |x| < 0.625
+ cmp.lt p6, p7 = rAbsXBits, r0625
+}
+;;
+{ .mfi
+ ldfpd fA29, fA31 = [rTblAddr] // A29, fA31
+ // 1 - x = 1 - |x| for positive x
+(p9) fms.s1 fR = f1, f1, f8
+ // point to coefficient of "near 1" polynomial
+(p7) adds rTmpPtr2 = 176, rTblAddr
+}
+{ .mfi
+ ldfpd fA33, fA35 = [rTmpPtr1], 16 // A33, fA35
+ // 1 + x = 1 - |x| for negative x
+(p8) fma.s1 fR = f1, f1, f8
+(p6) adds rTmpPtr2 = 48, rTblAddr
+}
+;;
+{ .mfi
+ ldfe fB0 = [rTmpPtr1], 16 // B0
+ nop.f 0
+ nop.i 0
+}
+{ .mib
+ adds rTmpPtr3 = 16, rTmpPtr2
+ // set p10 = 1 if |x| = 1.0
+ cmp.eq p10, p0 = rAbsXBits, rOne
+ // branch on special path for |x| = 1.0
+(p10) br.cond.spnt acos_abs_1
+}
+;;
+{ .mfi
+ ldfe fA3 = [rTmpPtr2], 48 // A3 or B1
+ nop.f 0
+ adds rTmpPtr1 = 64, rTmpPtr3
+}
+{ .mib
+ ldfpd fA9, fA11 = [rTmpPtr3], 16 // A9, A11 or B10, B11
+ // set p11 = 1 if |x| > 1.0
+ cmp.gt p11, p0 = rAbsXBits, rOne
+ // branch on special path for |x| > 1.0
+(p11) br.cond.spnt acos_abs_gt_1
+}
+;;
+{ .mfi
+ ldfpd fA17, fA19 = [rTmpPtr2], 16 // A17, A19 or B6, B7
+ // initial approximation of 1 / sqrt(1 - x)
+ frsqrta.s1 f1mXRcp, p0 = f1mX
+ nop.i 0
+}
+{ .mfi
+ ldfpd fA13, fA15 = [rTmpPtr3] // A13, A15 or B4, B5
+ fma.s1 fXCube = fXSqr, f8, f0 // x^3
+ nop.i 0
+}
+;;
+{ .mfi
+ ldfe fA5 = [rTmpPtr2], 48 // A5 or B2
+ // initial approximation of 1 / sqrt(1 + x)
+ frsqrta.s1 f1pXRcp, p0 = f1pX
+ nop.i 0
+}
+{ .mfi
+ ldfpd fA21, fA23 = [rTmpPtr1], 16 // A21, A23 or B3, B8
+ fma.s1 fXQuadr = fXSqr, fXSqr, f0 // x^4
+ nop.i 0
+}
+;;
+{ .mfi
+ ldfe fA7 = [rTmpPtr1] // A7 or Pi/2
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+ nop.i 0
+}
+{ .mfb
+ ldfpd fA25, fA27 = [rTmpPtr2] // A25, A27 or B9, B12
+ nop.f 0
+(p6) br.cond.spnt acos_base_range;
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fH = fHalf, f1mXRcp, f0 // H0 for x > 0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fS = f1mX, f1mXRcp, f0 // S0 for x > 0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fH = fHalf, f1pXRcp, f0 // H0 for x < 0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fS = f1pX, f1pXRcp, f0 // S0 for x > 0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fRQuadr = fRSqr, fRSqr, f0 // R^4
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB11 = fB11, fR, fB10
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB1 = fB1, fR, fB0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB5 = fB5, fR, fB4
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fR, fB6
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fR, fB2
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d0 = 1/2 - H0*S0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fR8 = fRQuadr, fRQuadr, f0 // R^4
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB9 = fB9, fR, fB8
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB11
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRSqr, fB5
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fRSqr, fB1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H1 = H0 + H0*d0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S1 = S0 + S0*d0
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+(p9) fma.s1 fCpi = f1, f0, f0 // Cpi = 0 if x > 0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fCpi = fPiBy2, f1, fPiBy2 // Cpi = Pi if x < 0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRQuadr, fB3
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d1 = 1/2 - H1*S1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fSignedS = fSignX, fS, f0 // -signum(x)*S1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fCloseTo1Pol = fB12, fR8, fB7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H2 = H1 + H1*d1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S2 = S1 + S1*d1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // -signum(x)* S2 = -signum(x)*(S1 + S1*d1)
+ fma.s1 fSignedS = fSignedS, fD, fSignedS
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d2 = 1/2 - H2*S2
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // Cpi + signum(x)*PolB*S2
+ fnma.s1 fCpi = fSignedS, fCloseTo1Pol, fCpi
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // signum(x)*PolB * S2
+ fnma.s1 fCloseTo1Pol = fSignedS, fCloseTo1Pol, f0
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for 0.625 <= |x| < 1
+ fma.d.s0 f8 = fCloseTo1Pol, fD, fCpi
+ // exit here for 0.625 <= |x| < 1
+ br.ret.sptk b0
+}
+;;
+
+
+// here if |x| < 0.625
+.align 32
+acos_base_range:
+{ .mfi
+ ldfe fCpi = [rPiBy2Ptr] // Pi/2
+ fma.s1 fA33 = fA33, fXSqr, fA31
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA15 = fA15, fXSqr, fA13
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA29 = fA29, fXSqr, fA27
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXSqr, fA23
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fA21, fXSqr, fA19
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXSqr, fA7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA5 = fA5, fXSqr, fA3
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA33
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXQuadr, fA15
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX8 = fXQuadr, fXQuadr, f0 // x^8
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXQuadr, fA21
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXQuadr, fA5
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fms.s1 fCpi = fCpi, f1, f8 // Pi/2 - x
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA29
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXSqr, fA11
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX16 = fX8, fX8, f0 // x^16
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fX8, fA25
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fX8, fA9
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fBaseP = fA35, fX16, fA17
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for |x| < 0.625
+ fnma.d.s0 f8 = fBaseP, fXCube, fCpi
+ // exit here for |x| < 0.625 path
+ br.ret.sptk b0
+}
+;;
+
+// here if |x| = 1
+// acos(1) = 0
+// acos(-1) = Pi
+.align 32
+acos_abs_1:
+{ .mfi
+ ldfe fPiBy2 = [rPiBy2Ptr] // Pi/2
+ nop.f 0
+ nop.i 0
+}
+;;
+.pred.rel "mutex", p8, p9
+{ .mfi
+ nop.m 0
+ // result for x = 1.0
+(p9) fma.d.s0 f8 = f1, f0, f0 // 0.0
+ nop.i 0
+}
+{.mfb
+ nop.m 0
+ // result for x = -1.0
+(p8) fma.d.s0 f8 = fPiBy2, f1, fPiBy2 // Pi
+ // exit here for |x| = 1.0
+ br.ret.sptk b0
+}
+;;
+
+// here if x is a NaN, denormal, or zero
+.align 32
+acos_special:
+{ .mfi
+ // point to Pi/2
+ adds rPiBy2Ptr = 272, rTblAddr
+ // set p12 = 1 if x is a NaN
+ fclass.m p12, p0 = f8, 0xc3
+ nop.i 0
+}
+{ .mlx
+ nop.m 0
+ // smallest positive DP normalized number
+ movl rDenoBound = 0x0010000000000000
+}
+;;
+{ .mfi
+ ldfe fPiBy2 = [rPiBy2Ptr] // Pi/2
+ // set p13 = 1 if x = 0.0
+ fclass.m p13, p0 = f8, 0x07
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8
+ nop.i 0
+}
+;;
+{ .mfb
+ // load smallest normal to FP reg
+ setf.d fDenoBound = rDenoBound
+ // answer if x is a NaN
+(p12) fma.d.s0 f8 = f8,f1,f0
+ // exit here if x is a NaN
+(p12) br.ret.spnt b0
+}
+;;
+{ .mfi
+ nop.m 0
+ // absolute value of normalized x
+ fmerge.s fNormX = f1, fNormX
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for x = 0
+(p13) fma.d.s0 f8 = fPiBy2, f1, f8
+ // exit here if x = 0.0
+(p13) br.ret.spnt b0
+}
+;;
+// if we still here then x is denormal or unnormal
+{ .mfi
+ nop.m 0
+ // set p14 = 1 if normalized x is greater than or
+ // equal to the smallest denormalized value
+ // So, if p14 is set to 1 it means that we deal with
+ // unnormal rather than with "true" denormal
+ fcmp.ge.s1 p14, p0 = fNormX, fDenoBound
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+(p14) fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag if x unnormal
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // normalize unnormal input
+(p14) fnorm.s1 f8 = f8
+ // return to the main path
+(p14) br.cond.sptk acos_unnormal_back
+}
+;;
+// if we still here it means that input is "true" denormal
+{ .mfb
+ nop.m 0
+ // final result if x is denormal
+ fms.d.s0 f8 = fPiBy2, f1, f8 // Pi/2 - x
+ // exit here if x is denormal
+ br.ret.sptk b0
+}
+;;
+
+// here if |x| > 1.0
+// error handler should be called
+.align 32
+acos_abs_gt_1:
+{ .mfi
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 58 // error code
+ frcpa.s0 FR_RESULT, p0 = f0,f0
+ // call error handler routine
+ br.cond.sptk __libm_error_region
+}
+;;
+GLOBAL_IEEE754_END(acos)
+libm_alias_double_other (__acos, acos)
+
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosf.S b/sysdeps/ia64/fpu/e_acosf.S
new file mode 100644
index 0000000000..56106f6b58
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acosf.S
@@ -0,0 +1,694 @@
+.file "acosf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 06/28/00 Improved speed
+// 06/31/00 Changed register allocation because of some duplicate macros
+// moved nan exit bundle up to gain a cycle.
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 08/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
+// 03/13/01 Corrected sign of imm1 value in dep instruction.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Moved mutex after label
+
+
+// Description
+//=========================================
+// The acosf function computes the principle value of the arc sine of x.
+// A domain error occurs for arguments not in the range [-1,+1].
+
+// The acosf function returns the arc cosine in the range [0, +pi] radians.
+// acos(1) returns +0
+// acos(x) returns a Nan and raises the invalid exception for |x| >1
+
+// |x| <= sqrt(2)/2. get Ax and Bx
+
+// poly_p1 = x p1
+// poly_p3 = x2 p4 + p3
+// poly_p1 = x2 (poly_p1) + x = x2(x p1) + x
+// poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2
+
+// poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x
+// = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+
+// poly_p7 = x2 p8 + p7
+// poly_p5 = x2 p6 + p5
+
+// poly_p7 = x4 p9 + (x2 p8 + p7)
+// poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5
+
+// sinf1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+// = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x
+// answer1 = pi/2 - sinf1
+
+
+
+// |x| > sqrt(2)/2
+
+// Get z = sqrt(1-x2)
+
+// Get polynomial in t = 1-x2
+
+// t2 = t t
+// t4 = t2 t2
+
+// poly_p4 = t p5 + p4
+// poly_p1 = t p1 + 1
+
+// poly_p6 = t p7 + p6
+// poly_p2 = t p3 + p2
+
+// poly_p8 = t p9 + p8
+
+// poly_p4 = t2 poly_p6 + poly_p4
+// = t2 (t p7 + p6) + (t p5 + p4)
+
+// poly_p2 = t2 poly_p2 + poly_p1
+// = t2 (t p3 + p2) + (t p1 + 1)
+
+// poly_p4 = t4 poly_p8 + poly_p4
+// = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))
+
+// P(t) = poly_p2 + t4 poly_p8
+// = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)))
+// = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4
+
+
+// answer2 = sign(x) z P(t) if x>0
+// = sign(x) z P(t) + pi if x<0
+
+
+//
+// Assembly macros
+//=========================================
+
+// predicate registers
+//acosf_pred_LEsqrt2by2 = p7
+//acosf_pred_GTsqrt2by2 = p8
+
+// integer registers
+ACOSF_Addr1 = r33
+ACOSF_Addr2 = r34
+ACOSF_GR_1by2 = r35
+
+ACOSF_GR_3by2 = r36
+ACOSF_GR_5by2 = r37
+
+GR_SAVE_B0 = r38
+GR_SAVE_PFS = r39
+GR_SAVE_GP = r40
+
+GR_Parameter_X = r41
+GR_Parameter_Y = r42
+GR_Parameter_RESULT = r43
+GR_Parameter_TAG = r44
+
+// floating point registers
+
+acosf_y = f32
+acosf_abs_x = f33
+acosf_x2 = f34
+acosf_sgn_x = f35
+
+acosf_1by2 = f36
+acosf_3by2 = f37
+acosf_5by2 = f38
+acosf_coeff_P3 = f39
+acosf_coeff_P8 = f40
+
+acosf_coeff_P1 = f41
+acosf_coeff_P4 = f42
+acosf_coeff_P5 = f43
+acosf_coeff_P2 = f44
+acosf_coeff_P7 = f45
+
+acosf_coeff_P6 = f46
+acosf_coeff_P9 = f47
+acosf_x2 = f48
+acosf_x3 = f49
+acosf_x4 = f50
+
+acosf_x8 = f51
+acosf_x5 = f52
+acosf_const_piby2 = f53
+acosf_const_sqrt2by2 = f54
+acosf_x11 = f55
+
+acosf_poly_p1 = f56
+acosf_poly_p3 = f57
+acosf_sinf1 = f58
+acosf_poly_p2 = f59
+acosf_poly_Ax = f60
+
+acosf_poly_p7 = f61
+acosf_poly_p5 = f62
+acosf_sgnx_t4 = f63
+acosf_poly_Bx = f64
+acosf_t = f65
+
+acosf_yby2 = f66
+acosf_B = f67
+acosf_B2 = f68
+acosf_Az = f69
+acosf_dz = f70
+
+acosf_Sz = f71
+acosf_d2z = f72
+acosf_Fz = f73
+acosf_z = f74
+acosf_sgnx_z = f75
+
+acosf_t2 = f76
+acosf_2poly_p4 = f77
+acosf_2poly_p6 = f78
+acosf_2poly_p1 = f79
+acosf_2poly_p2 = f80
+
+acosf_2poly_p8 = f81
+acosf_t4 = f82
+acosf_Pt = f83
+acosf_sgnx_2poly_p2 = f84
+acosf_sgn_x_piby2 = f85
+
+acosf_poly_p7a = f86
+acosf_2poly_p4a = f87
+acosf_2poly_p4b = f88
+acosf_2poly_p2a = f89
+acosf_poly_p1a = f90
+
+
+
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(acosf_coeff_1_table)
+data8 0x3FC5555607DCF816 // P1
+data8 0x3F9CF81AD9BAB2C6 // P4
+data8 0x3FC59E0975074DF3 // P7
+data8 0xBFA6F4CC2780AA1D // P6
+data8 0x3FC2DD45292E93CB // P9
+data8 0x3fe6a09e667f3bcd // sqrt(2)/2
+LOCAL_OBJECT_END(acosf_coeff_1_table)
+
+LOCAL_OBJECT_START(acosf_coeff_2_table)
+data8 0x3FA6F108E31EFBA6 // P3
+data8 0xBFCA31BF175D82A0 // P8
+data8 0x3FA30C0337F6418B // P5
+data8 0x3FB332C9266CB1F9 // P2
+data8 0x3ff921fb54442d18 // pi_by_2
+LOCAL_OBJECT_END(acosf_coeff_2_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(acosf)
+
+// Load the addresses of the two tables.
+// Then, load the coefficients and other constants.
+
+{ .mfi
+ alloc r32 = ar.pfs,1,8,4,0
+ fnma.s1 acosf_t = f8,f8,f1
+ dep.z ACOSF_GR_1by2 = 0x3f,24,8 // 0x3f000000
+}
+{ .mfi
+ addl ACOSF_Addr1 = @ltoff(acosf_coeff_1_table),gp
+ fma.s1 acosf_x2 = f8,f8,f0
+ addl ACOSF_Addr2 = @ltoff(acosf_coeff_2_table),gp ;;
+}
+
+
+{ .mfi
+ ld8 ACOSF_Addr1 = [ACOSF_Addr1]
+ fmerge.s acosf_abs_x = f1,f8
+ dep ACOSF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
+}
+{ .mlx
+ nop.m 999
+ movl ACOSF_GR_5by2 = 0x40200000;;
+}
+
+
+
+{ .mfi
+ setf.s acosf_1by2 = ACOSF_GR_1by2
+ fmerge.s acosf_sgn_x = f8,f1
+ nop.i 999
+}
+{ .mfi
+ ld8 ACOSF_Addr2 = [ACOSF_Addr2]
+ nop.f 0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ setf.s acosf_5by2 = ACOSF_GR_5by2
+ fcmp.lt.s1 p11,p12 = f8,f0
+ nop.i 999;;
+}
+
+{ .mmf
+ ldfpd acosf_coeff_P1,acosf_coeff_P4 = [ACOSF_Addr1],16
+ setf.s acosf_3by2 = ACOSF_GR_3by2
+ fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
+}
+
+
+{ .mfi
+ ldfpd acosf_coeff_P7,acosf_coeff_P6 = [ACOSF_Addr1],16
+ fma.s1 acosf_t2 = acosf_t,acosf_t,f0
+ nop.i 999
+}
+{ .mfi
+ ldfpd acosf_coeff_P3,acosf_coeff_P8 = [ACOSF_Addr2],16
+ fma.s1 acosf_x4 = acosf_x2,acosf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd acosf_coeff_P9,acosf_const_sqrt2by2 = [ACOSF_Addr1]
+ fclass.m.unc p10,p0 = f8, 0x07 //@zero
+ nop.i 999
+}
+{ .mfi
+ ldfpd acosf_coeff_P5,acosf_coeff_P2 = [ACOSF_Addr2],16
+ fma.s1 acosf_x3 = f8,acosf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfd acosf_const_piby2 = [ACOSF_Addr2]
+ frsqrta.s1 acosf_B,p0 = acosf_t
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p8) fma.s.s0 f8 = f8,f1,f0
+(p8) br.ret.spnt b0 ;; // Exit if x=nan
+}
+
+
+{ .mfb
+ nop.m 999
+ fcmp.eq.s1 p6,p0 = acosf_abs_x,f1
+(p10) br.cond.spnt ACOSF_ZERO ;; // Branch if x=0
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.gt.s1 p9,p0 = acosf_abs_x,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_x8 = acosf_x4,acosf_x4,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 acosf_t4 = acosf_t2,acosf_t2,f0
+(p6) br.cond.spnt ACOSF_ABS_ONE ;; // Branch if |x|=1
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_x5 = acosf_x2,acosf_x3,f0
+ nop.i 999
+}
+{ .mfb
+(p9) mov GR_Parameter_TAG = 59
+ fma.s1 acosf_yby2 = acosf_t,acosf_1by2,f0
+(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_Az = acosf_t,acosf_B,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_B2 = acosf_B,acosf_B,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p1 = f8,acosf_coeff_P1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p1 = acosf_coeff_P1,acosf_t,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p3 = acosf_coeff_P4,acosf_x2,acosf_coeff_P3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p6 = acosf_coeff_P7,acosf_t,acosf_coeff_P6
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p7 = acosf_x2,acosf_coeff_P8,acosf_coeff_P7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p2 = acosf_coeff_P3,acosf_t,acosf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p5 = acosf_x2,acosf_coeff_P6,acosf_coeff_P5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p4 = acosf_coeff_P5,acosf_t,acosf_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_x11 = acosf_x8,acosf_x3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 acosf_dz = acosf_B2,acosf_yby2,acosf_1by2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p1a = acosf_x2,acosf_poly_p1,f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p8 = acosf_coeff_P9,acosf_t,acosf_coeff_P8
+ nop.i 999;;
+}
+
+
+// Get the absolute value of x and determine the region in which x lies
+
+{ .mfi
+ nop.m 999
+ fcmp.le.s1 p7,p8 = acosf_abs_x,acosf_const_sqrt2by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p2 = acosf_x2,acosf_poly_p3,acosf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_poly_p7a = acosf_x4,acosf_coeff_P9,acosf_poly_p7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 acosf_2poly_p2a = acosf_2poly_p2,acosf_t2,acosf_2poly_p1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_sgnx_t4 = acosf_sgn_x,acosf_t4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_2poly_p4a = acosf_2poly_p6,acosf_t2,acosf_2poly_p4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_Sz = acosf_5by2,acosf_dz,acosf_3by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_d2z = acosf_dz,acosf_dz,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fnma.d.s1 acosf_sgn_x_piby2 = acosf_sgn_x,acosf_const_piby2,acosf_const_piby2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 acosf_poly_Ax = acosf_x5,acosf_poly_p2,acosf_poly_p1a
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 acosf_poly_Bx = acosf_x4,acosf_poly_p7a,acosf_poly_p5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_sgnx_2poly_p2 = acosf_sgn_x,acosf_2poly_p2a,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_2poly_p4b = acosf_2poly_p8,acosf_t4,acosf_2poly_p4a
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 acosf_Fz = acosf_d2z,acosf_Sz,acosf_dz
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 acosf_Pt = acosf_2poly_p4b,acosf_sgnx_t4,acosf_sgnx_2poly_p2
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 acosf_z = acosf_Az,acosf_Fz,acosf_Az
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.d.s1 acosf_sinf1 = acosf_x11,acosf_poly_Bx,acosf_poly_Ax
+ nop.i 999;;
+}
+
+.pred.rel "mutex",p8,p7 //acosf_pred_GTsqrt2by2,acosf_pred_LEsqrt2by2
+{ .mfi
+ nop.m 999
+(p8) fma.s.s0 f8 = acosf_z,acosf_Pt,acosf_sgn_x_piby2
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fms.s.s0 f8 = acosf_const_piby2,f1,acosf_sinf1
+ br.ret.sptk b0 ;;
+}
+
+ACOSF_ZERO:
+// Here if x=0
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = acosf_const_piby2,f1,f0 // acosf(0)=pi/2
+ br.ret.sptk b0 ;;
+}
+
+
+ACOSF_ABS_ONE:
+.pred.rel "mutex",p11,p12
+// Here if |x|=1
+{ .mfi
+ nop.m 999
+(p11) fma.s.s0 f8 = acosf_const_piby2,f1,acosf_const_piby2 // acosf(-1)=pi
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p12) fma.s.s0 f8 = f1,f0,f0 // acosf(1)=0
+ br.ret.sptk b0 ;;
+}
+
+GLOBAL_IEEE754_END(acosf)
+libm_alias_float_other (__acos, acos)
+
+
+// Stack operations when calling error support.
+// (1) (2)
+// sp -> + psp -> +
+// | |
+// | | <- GR_Y
+// | |
+// | <-GR_Y Y2->|
+// | |
+// | | <- GR_X
+// | |
+// sp-64 -> + sp -> +
+// save ar.pfs save b0
+// save gp
+
+
+// Stack operations when calling error support.
+// (3) (call) (4)
+// psp -> + sp -> +
+// | |
+// R3 ->| <- GR_RESULT | -> f8
+// | |
+// Y2 ->| <- GR_Y |
+// | |
+// X1 ->| |
+// | |
+// sp -> + +
+// restore gp
+// restore ar.pfs
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mfi
+ nop.m 0
+ frcpa.s0 f9,p0 = f0,f0
+ nop.i 0
+};;
+
+{ .mib
+ stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosh.S b/sysdeps/ia64/fpu/e_acosh.S
new file mode 100644
index 0000000000..307b4976be
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acosh.S
@@ -0,0 +1,1202 @@
+.file "acosh.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// ==============================================================
+// History
+// ==============================================================
+// 03/23/01 Initial version
+// 04/19/01 Improved speed of the paths #1,2,3,4,5
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 05/14/03 Improved performance, set denormal flag for unorms >= 1.0
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+// ==============================================================
+// double acosh(double)
+//
+// Overview of operation
+// ==============================================================
+//
+// There are 7 paths:
+// 1. x = 1.0
+// Return acosh(x) = 0.0
+// 2. 1.0 < x < 1.000499725341796875(0x3FF0020C00000000)
+// Return acosh(x) = sqrt(x-1) * Pol4(x), where Pol4(x) =
+// (((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0
+
+// 3. 1.000499725341796875(0x3FF0020C00000000) <= x < 2^63
+// Return acosh(x) = log(x + sqrt(x^2 -1.0))
+// To compute x + sqrt(x^2 -1.0) modified Newton Raphson method is used
+// (3 iterations)
+// Algorithm description for log function see below.
+//
+// 4. 2^63 <= x < +INF
+// Return acosh(x) = log(2*x)
+// Algorithm description for log function see below.
+//
+// 5. x = +INF
+// Return acosh(x) = +INF
+//
+// 6. x = [S,Q]NaN
+// Return acosh(x) = QNaN
+//
+// 7. x < 1.0
+// It's domain error. Error handler with tag = 136 is called
+//
+//==============================================================
+// Algorithm Description for log(x) function
+// Below we are using the fact that inequality x - 1.0 > 2^(-6) is always
+// true for this acosh implementation
+//
+// Consider x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(frcpa(x) x/frcpa(x))
+// = log(1/frcpa(x)) + log(frcpa(x) x)
+// = -log(frcpa(x)) + log(frcpa(x) x)
+//
+// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
+//
+// -log(frcpa(x)) = -log(C)
+// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
+//
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+//
+// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 + T + log(frcpa(x) x)
+//
+// Log(x) = +Nlog2 + T + log(C x)
+//
+// Cx = 1 + r
+//
+// Log(x) = +Nlog2 + T + log(1+r)
+// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
+//
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
+//
+// Implementation
+//==============================================================
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6
+//
+// x = f * 2*n where f is 1.f_1f_2f_3....f_63
+// Nfloat = float(n) where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
+//
+// result = (T + Nfloat * log(2)) + rseries
+//
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+// y_k = frcpa(x_k)
+// log(1/y_k) in quad and round to double-extended
+//
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f9 -> f15, f32 -> f65
+
+// General registers used:
+// r14 -> r27, r32 -> r39
+
+// Predicate registers used:
+// p6 -> p15
+
+// p6 to filter out case when x = [Q,S]NaN
+// p7,p8 to filter out case when x < 1.0
+// p10 to select path #1
+// p11 to filter out case when x = +INF
+// p12 used in the frcpa
+// p13 to select path #4
+// p14,p15 to select path #2
+
+// Assembly macros
+//==============================================================
+log_GR_exp_17_ones = r14
+log_GR_signexp_f8 = r15
+log_table_address2 = r16
+log_GR_exp_16_ones = r17
+log_GR_exp_f8 = r18
+log_GR_true_exp_f8 = r19
+log_GR_significand_f8 = r20
+log_GR_index = r21
+log_GR_comp2 = r22
+acosh_GR_f8 = r23
+log_GR_comp = r24
+acosh_GR_f8_sig = r25
+log_table_address3 = r26
+NR_table_address = r27
+
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_SAVE_PFS = r35
+
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+acosh_GR_tag = r39
+
+//==============================================================
+log_y = f9
+NR1 = f10
+NR2 = f11
+log_y_rs = f12
+log_y_rs_iter = f13
+log_y_rs_iter1 = f14
+log_NORM_f8 = f15
+acosh_comp = f32
+log_w = f34
+log_P5 = f35
+log_P4 = f36
+log_P3 = f37
+log_P2 = f38
+log_P1 = f39
+log_C0 = f40
+log_C1 = f41
+log_C2 = f42
+log2 = f43
+acosh_w_rs = f44
+log_C = f45
+log_arg = f46
+acosh_w_iter1 = f47
+acosh_w_iter2 = f48
+log_int_Nfloat = f49
+log_r = f50
+log_rsq = f51
+log_rp_p4 = f52
+log_rp_p32 = f53
+log_rcube = f54
+log_rp_p10 = f55
+log_rp_p2 = f56
+log_Nfloat = f57
+log_T = f58
+log_r2P_r = f59
+log_T_plus_Nlog2 = f60
+acosh_w_sqrt = f61
+acosh_w_1 = f62
+log_C3 = f63
+log_C4 = f64
+log_arg_early = f65
+
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(log_table_1)
+data8 0x3FF0020C49BA5E35 // 1.0005
+data8 0xBFC5555DA7212371 // P5
+data8 0x3FC999A19EEF5826 // P4
+data8 0xBFCFFFFFFFFEF009 // P3
+data8 0x3FD555555554ECB2 // P2
+data8 0xBFE0000000000000 // P1 = -0.5
+//
+data8 0xb17217f7d1cf79ac, 0x00003ffe // log2
+LOCAL_OBJECT_END(log_table_1)
+
+LOCAL_OBJECT_START(log_table_2)
+data8 0x3FE0000000000000 // 0.5
+data8 0x4008000000000000 // 3.0
+//
+data8 0xAFE8F9203939CCF8, 0x00003FF6 // C4 3FF6AFE8F9203939CCF8
+data8 0xAD46EB6AE752D809, 0x0000BFF8 // C3 BFF8AD46EB6AE752D809
+data8 0xD93923D7F53F3627, 0x00003FF9 // C2 3FF9D93923D7F53F3627
+data8 0xF15BEEEFF7D32D36, 0x0000BFFB // C1 BFFBF15BEEEFF7D32D36
+data8 0xB504F333F9DE6484, 0x00003FFF // C0 3FFFB504F333F9DE6484
+LOCAL_OBJECT_END(log_table_2)
+
+
+LOCAL_OBJECT_START(log_table_3)
+data8 0x80200aaeac44ef38 , 0x00003ff6 // log(1/frcpa(1+ 0/2^-8))
+//
+data8 0xc09090a2c35aa070 , 0x00003ff7 // log(1/frcpa(1+ 1/2^-8))
+data8 0xa0c94fcb41977c75 , 0x00003ff8 // log(1/frcpa(1+ 2/2^-8))
+data8 0xe18b9c263af83301 , 0x00003ff8 // log(1/frcpa(1+ 3/2^-8))
+data8 0x8d35c8d6399c30ea , 0x00003ff9 // log(1/frcpa(1+ 4/2^-8))
+data8 0xadd4d2ecd601cbb8 , 0x00003ff9 // log(1/frcpa(1+ 5/2^-8))
+//
+data8 0xce95403a192f9f01 , 0x00003ff9 // log(1/frcpa(1+ 6/2^-8))
+data8 0xeb59392cbcc01096 , 0x00003ff9 // log(1/frcpa(1+ 7/2^-8))
+data8 0x862c7d0cefd54c5d , 0x00003ffa // log(1/frcpa(1+ 8/2^-8))
+data8 0x94aa63c65e70d499 , 0x00003ffa // log(1/frcpa(1+ 9/2^-8))
+data8 0xa54a696d4b62b382 , 0x00003ffa // log(1/frcpa(1+ 10/2^-8))
+//
+data8 0xb3e4a796a5dac208 , 0x00003ffa // log(1/frcpa(1+ 11/2^-8))
+data8 0xc28c45b1878340a9 , 0x00003ffa // log(1/frcpa(1+ 12/2^-8))
+data8 0xd35c55f39d7a6235 , 0x00003ffa // log(1/frcpa(1+ 13/2^-8))
+data8 0xe220f037b954f1f5 , 0x00003ffa // log(1/frcpa(1+ 14/2^-8))
+data8 0xf0f3389b036834f3 , 0x00003ffa // log(1/frcpa(1+ 15/2^-8))
+//
+data8 0xffd3488d5c980465 , 0x00003ffa // log(1/frcpa(1+ 16/2^-8))
+data8 0x87609ce2ed300490 , 0x00003ffb // log(1/frcpa(1+ 17/2^-8))
+data8 0x8ede9321e8c85927 , 0x00003ffb // log(1/frcpa(1+ 18/2^-8))
+data8 0x96639427f2f8e2f4 , 0x00003ffb // log(1/frcpa(1+ 19/2^-8))
+data8 0x9defad3e8f73217b , 0x00003ffb // log(1/frcpa(1+ 20/2^-8))
+//
+data8 0xa582ebd50097029c , 0x00003ffb // log(1/frcpa(1+ 21/2^-8))
+data8 0xac06dbe75ab80fee , 0x00003ffb // log(1/frcpa(1+ 22/2^-8))
+data8 0xb3a78449b2d3ccca , 0x00003ffb // log(1/frcpa(1+ 23/2^-8))
+data8 0xbb4f79635ab46bb2 , 0x00003ffb // log(1/frcpa(1+ 24/2^-8))
+data8 0xc2fec93a83523f3f , 0x00003ffb // log(1/frcpa(1+ 25/2^-8))
+//
+data8 0xc99af2eaca4c4571 , 0x00003ffb // log(1/frcpa(1+ 26/2^-8))
+data8 0xd1581106472fa653 , 0x00003ffb // log(1/frcpa(1+ 27/2^-8))
+data8 0xd8002560d4355f2e , 0x00003ffb // log(1/frcpa(1+ 28/2^-8))
+data8 0xdfcb43b4fe508632 , 0x00003ffb // log(1/frcpa(1+ 29/2^-8))
+data8 0xe67f6dff709d4119 , 0x00003ffb // log(1/frcpa(1+ 30/2^-8))
+//
+data8 0xed393b1c22351280 , 0x00003ffb // log(1/frcpa(1+ 31/2^-8))
+data8 0xf5192bff087bcc35 , 0x00003ffb // log(1/frcpa(1+ 32/2^-8))
+data8 0xfbdf4ff6dfef2fa3 , 0x00003ffb // log(1/frcpa(1+ 33/2^-8))
+data8 0x81559a97f92f9cc7 , 0x00003ffc // log(1/frcpa(1+ 34/2^-8))
+data8 0x84be72bce90266e8 , 0x00003ffc // log(1/frcpa(1+ 35/2^-8))
+//
+data8 0x88bc74113f23def2 , 0x00003ffc // log(1/frcpa(1+ 36/2^-8))
+data8 0x8c2ba3edf6799d11 , 0x00003ffc // log(1/frcpa(1+ 37/2^-8))
+data8 0x8f9dc92f92ea08b1 , 0x00003ffc // log(1/frcpa(1+ 38/2^-8))
+data8 0x9312e8f36efab5a7 , 0x00003ffc // log(1/frcpa(1+ 39/2^-8))
+data8 0x968b08643409ceb6 , 0x00003ffc // log(1/frcpa(1+ 40/2^-8))
+//
+data8 0x9a062cba08a1708c , 0x00003ffc // log(1/frcpa(1+ 41/2^-8))
+data8 0x9d845b3abf95485c , 0x00003ffc // log(1/frcpa(1+ 42/2^-8))
+data8 0xa06fd841bc001bb4 , 0x00003ffc // log(1/frcpa(1+ 43/2^-8))
+data8 0xa3f3a74652fbe0db , 0x00003ffc // log(1/frcpa(1+ 44/2^-8))
+data8 0xa77a8fb2336f20f5 , 0x00003ffc // log(1/frcpa(1+ 45/2^-8))
+//
+data8 0xab0497015d28b0a0 , 0x00003ffc // log(1/frcpa(1+ 46/2^-8))
+data8 0xae91c2be6ba6a615 , 0x00003ffc // log(1/frcpa(1+ 47/2^-8))
+data8 0xb189d1b99aebb20b , 0x00003ffc // log(1/frcpa(1+ 48/2^-8))
+data8 0xb51cced5de9c1b2c , 0x00003ffc // log(1/frcpa(1+ 49/2^-8))
+data8 0xb819bee9e720d42f , 0x00003ffc // log(1/frcpa(1+ 50/2^-8))
+//
+data8 0xbbb2a0947b093a5d , 0x00003ffc // log(1/frcpa(1+ 51/2^-8))
+data8 0xbf4ec1505811684a , 0x00003ffc // log(1/frcpa(1+ 52/2^-8))
+data8 0xc2535bacfa8975ff , 0x00003ffc // log(1/frcpa(1+ 53/2^-8))
+data8 0xc55a3eafad187eb8 , 0x00003ffc // log(1/frcpa(1+ 54/2^-8))
+data8 0xc8ff2484b2c0da74 , 0x00003ffc // log(1/frcpa(1+ 55/2^-8))
+//
+data8 0xcc0b1a008d53ab76 , 0x00003ffc // log(1/frcpa(1+ 56/2^-8))
+data8 0xcfb6203844b3209b , 0x00003ffc // log(1/frcpa(1+ 57/2^-8))
+data8 0xd2c73949a47a19f5 , 0x00003ffc // log(1/frcpa(1+ 58/2^-8))
+data8 0xd5daae18b49d6695 , 0x00003ffc // log(1/frcpa(1+ 59/2^-8))
+data8 0xd8f08248cf7e8019 , 0x00003ffc // log(1/frcpa(1+ 60/2^-8))
+//
+data8 0xdca7749f1b3e540e , 0x00003ffc // log(1/frcpa(1+ 61/2^-8))
+data8 0xdfc28e033aaaf7c7 , 0x00003ffc // log(1/frcpa(1+ 62/2^-8))
+data8 0xe2e012a5f91d2f55 , 0x00003ffc // log(1/frcpa(1+ 63/2^-8))
+data8 0xe600064ed9e292a8 , 0x00003ffc // log(1/frcpa(1+ 64/2^-8))
+data8 0xe9226cce42b39f60 , 0x00003ffc // log(1/frcpa(1+ 65/2^-8))
+//
+data8 0xec4749fd97a28360 , 0x00003ffc // log(1/frcpa(1+ 66/2^-8))
+data8 0xef6ea1bf57780495 , 0x00003ffc // log(1/frcpa(1+ 67/2^-8))
+data8 0xf29877ff38809091 , 0x00003ffc // log(1/frcpa(1+ 68/2^-8))
+data8 0xf5c4d0b245cb89be , 0x00003ffc // log(1/frcpa(1+ 69/2^-8))
+data8 0xf8f3afd6fcdef3aa , 0x00003ffc // log(1/frcpa(1+ 70/2^-8))
+//
+data8 0xfc2519756be1abc7 , 0x00003ffc // log(1/frcpa(1+ 71/2^-8))
+data8 0xff59119f503e6832 , 0x00003ffc // log(1/frcpa(1+ 72/2^-8))
+data8 0x8147ce381ae0e146 , 0x00003ffd // log(1/frcpa(1+ 73/2^-8))
+data8 0x82e45f06cb1ad0f2 , 0x00003ffd // log(1/frcpa(1+ 74/2^-8))
+data8 0x842f5c7c573cbaa2 , 0x00003ffd // log(1/frcpa(1+ 75/2^-8))
+//
+data8 0x85ce471968c8893a , 0x00003ffd // log(1/frcpa(1+ 76/2^-8))
+data8 0x876e8305bc04066d , 0x00003ffd // log(1/frcpa(1+ 77/2^-8))
+data8 0x891012678031fbb3 , 0x00003ffd // log(1/frcpa(1+ 78/2^-8))
+data8 0x8a5f1493d766a05f , 0x00003ffd // log(1/frcpa(1+ 79/2^-8))
+data8 0x8c030c778c56fa00 , 0x00003ffd // log(1/frcpa(1+ 80/2^-8))
+//
+data8 0x8da85df17e31d9ae , 0x00003ffd // log(1/frcpa(1+ 81/2^-8))
+data8 0x8efa663e7921687e , 0x00003ffd // log(1/frcpa(1+ 82/2^-8))
+data8 0x90a22b6875c6a1f8 , 0x00003ffd // log(1/frcpa(1+ 83/2^-8))
+data8 0x91f62cc8f5d24837 , 0x00003ffd // log(1/frcpa(1+ 84/2^-8))
+data8 0x93a06cfc3857d980 , 0x00003ffd // log(1/frcpa(1+ 85/2^-8))
+//
+data8 0x94f66d5e6fd01ced , 0x00003ffd // log(1/frcpa(1+ 86/2^-8))
+data8 0x96a330156e6772f2 , 0x00003ffd // log(1/frcpa(1+ 87/2^-8))
+data8 0x97fb3582754ea25b , 0x00003ffd // log(1/frcpa(1+ 88/2^-8))
+data8 0x99aa8259aad1bbf2 , 0x00003ffd // log(1/frcpa(1+ 89/2^-8))
+data8 0x9b0492f6227ae4a8 , 0x00003ffd // log(1/frcpa(1+ 90/2^-8))
+//
+data8 0x9c5f8e199bf3a7a5 , 0x00003ffd // log(1/frcpa(1+ 91/2^-8))
+data8 0x9e1293b9998c1daa , 0x00003ffd // log(1/frcpa(1+ 92/2^-8))
+data8 0x9f6fa31e0b41f308 , 0x00003ffd // log(1/frcpa(1+ 93/2^-8))
+data8 0xa0cda11eaf46390e , 0x00003ffd // log(1/frcpa(1+ 94/2^-8))
+data8 0xa22c8f029cfa45aa , 0x00003ffd // log(1/frcpa(1+ 95/2^-8))
+//
+data8 0xa3e48badb7856b34 , 0x00003ffd // log(1/frcpa(1+ 96/2^-8))
+data8 0xa5459a0aa95849f9 , 0x00003ffd // log(1/frcpa(1+ 97/2^-8))
+data8 0xa6a79c84480cfebd , 0x00003ffd // log(1/frcpa(1+ 98/2^-8))
+data8 0xa80a946d0fcb3eb2 , 0x00003ffd // log(1/frcpa(1+ 99/2^-8))
+data8 0xa96e831a3ea7b314 , 0x00003ffd // log(1/frcpa(1+100/2^-8))
+//
+data8 0xaad369e3dc544e3b , 0x00003ffd // log(1/frcpa(1+101/2^-8))
+data8 0xac92e9588952c815 , 0x00003ffd // log(1/frcpa(1+102/2^-8))
+data8 0xadfa035aa1ed8fdc , 0x00003ffd // log(1/frcpa(1+103/2^-8))
+data8 0xaf6219eae1ad6e34 , 0x00003ffd // log(1/frcpa(1+104/2^-8))
+data8 0xb0cb2e6d8160f753 , 0x00003ffd // log(1/frcpa(1+105/2^-8))
+//
+data8 0xb2354249ad950f72 , 0x00003ffd // log(1/frcpa(1+106/2^-8))
+data8 0xb3a056e98ef4a3b4 , 0x00003ffd // log(1/frcpa(1+107/2^-8))
+data8 0xb50c6dba52c6292a , 0x00003ffd // log(1/frcpa(1+108/2^-8))
+data8 0xb679882c33876165 , 0x00003ffd // log(1/frcpa(1+109/2^-8))
+data8 0xb78c07429785cedc , 0x00003ffd // log(1/frcpa(1+110/2^-8))
+//
+data8 0xb8faeb8dc4a77d24 , 0x00003ffd // log(1/frcpa(1+111/2^-8))
+data8 0xba6ad77eb36ae0d6 , 0x00003ffd // log(1/frcpa(1+112/2^-8))
+data8 0xbbdbcc915e9bee50 , 0x00003ffd // log(1/frcpa(1+113/2^-8))
+data8 0xbd4dcc44f8cf12ef , 0x00003ffd // log(1/frcpa(1+114/2^-8))
+data8 0xbec0d81bf5b531fa , 0x00003ffd // log(1/frcpa(1+115/2^-8))
+//
+data8 0xc034f19c139186f4 , 0x00003ffd // log(1/frcpa(1+116/2^-8))
+data8 0xc14cb69f7c5e55ab , 0x00003ffd // log(1/frcpa(1+117/2^-8))
+data8 0xc2c2abbb6e5fd56f , 0x00003ffd // log(1/frcpa(1+118/2^-8))
+data8 0xc439b2c193e6771e , 0x00003ffd // log(1/frcpa(1+119/2^-8))
+data8 0xc553acb9d5c67733 , 0x00003ffd // log(1/frcpa(1+120/2^-8))
+//
+data8 0xc6cc96e441272441 , 0x00003ffd // log(1/frcpa(1+121/2^-8))
+data8 0xc8469753eca88c30 , 0x00003ffd // log(1/frcpa(1+122/2^-8))
+data8 0xc962cf3ce072b05c , 0x00003ffd // log(1/frcpa(1+123/2^-8))
+data8 0xcadeba8771f694aa , 0x00003ffd // log(1/frcpa(1+124/2^-8))
+data8 0xcc5bc08d1f72da94 , 0x00003ffd // log(1/frcpa(1+125/2^-8))
+//
+data8 0xcd7a3f99ea035c29 , 0x00003ffd // log(1/frcpa(1+126/2^-8))
+data8 0xcef93860c8a53c35 , 0x00003ffd // log(1/frcpa(1+127/2^-8))
+data8 0xd0192f68a7ed23df , 0x00003ffd // log(1/frcpa(1+128/2^-8))
+data8 0xd19a201127d3c645 , 0x00003ffd // log(1/frcpa(1+129/2^-8))
+data8 0xd2bb92f4061c172c , 0x00003ffd // log(1/frcpa(1+130/2^-8))
+//
+data8 0xd43e80b2ee8cc8fc , 0x00003ffd // log(1/frcpa(1+131/2^-8))
+data8 0xd56173601fc4ade4 , 0x00003ffd // log(1/frcpa(1+132/2^-8))
+data8 0xd6e6637efb54086f , 0x00003ffd // log(1/frcpa(1+133/2^-8))
+data8 0xd80ad9f58f3c8193 , 0x00003ffd // log(1/frcpa(1+134/2^-8))
+data8 0xd991d1d31aca41f8 , 0x00003ffd // log(1/frcpa(1+135/2^-8))
+//
+data8 0xdab7d02231484a93 , 0x00003ffd // log(1/frcpa(1+136/2^-8))
+data8 0xdc40d532cde49a54 , 0x00003ffd // log(1/frcpa(1+137/2^-8))
+data8 0xdd685f79ed8b265e , 0x00003ffd // log(1/frcpa(1+138/2^-8))
+data8 0xde9094bbc0e17b1d , 0x00003ffd // log(1/frcpa(1+139/2^-8))
+data8 0xe01c91b78440c425 , 0x00003ffd // log(1/frcpa(1+140/2^-8))
+//
+data8 0xe14658f26997e729 , 0x00003ffd // log(1/frcpa(1+141/2^-8))
+data8 0xe270cdc2391e0d23 , 0x00003ffd // log(1/frcpa(1+142/2^-8))
+data8 0xe3ffce3a2aa64922 , 0x00003ffd // log(1/frcpa(1+143/2^-8))
+data8 0xe52bdb274ed82887 , 0x00003ffd // log(1/frcpa(1+144/2^-8))
+data8 0xe6589852e75d7df6 , 0x00003ffd // log(1/frcpa(1+145/2^-8))
+//
+data8 0xe786068c79937a7d , 0x00003ffd // log(1/frcpa(1+146/2^-8))
+data8 0xe91903adad100911 , 0x00003ffd // log(1/frcpa(1+147/2^-8))
+data8 0xea481236f7d35bb0 , 0x00003ffd // log(1/frcpa(1+148/2^-8))
+data8 0xeb77d48c692e6b14 , 0x00003ffd // log(1/frcpa(1+149/2^-8))
+data8 0xeca84b83d7297b87 , 0x00003ffd // log(1/frcpa(1+150/2^-8))
+//
+data8 0xedd977f4962aa158 , 0x00003ffd // log(1/frcpa(1+151/2^-8))
+data8 0xef7179a22f257754 , 0x00003ffd // log(1/frcpa(1+152/2^-8))
+data8 0xf0a450d139366ca7 , 0x00003ffd // log(1/frcpa(1+153/2^-8))
+data8 0xf1d7e0524ff9ffdb , 0x00003ffd // log(1/frcpa(1+154/2^-8))
+data8 0xf30c29036a8b6cae , 0x00003ffd // log(1/frcpa(1+155/2^-8))
+//
+data8 0xf4412bc411ea8d92 , 0x00003ffd // log(1/frcpa(1+156/2^-8))
+data8 0xf576e97564c8619d , 0x00003ffd // log(1/frcpa(1+157/2^-8))
+data8 0xf6ad62fa1b5f172f , 0x00003ffd // log(1/frcpa(1+158/2^-8))
+data8 0xf7e499368b55c542 , 0x00003ffd // log(1/frcpa(1+159/2^-8))
+data8 0xf91c8d10abaffe22 , 0x00003ffd // log(1/frcpa(1+160/2^-8))
+//
+data8 0xfa553f7018c966f3 , 0x00003ffd // log(1/frcpa(1+161/2^-8))
+data8 0xfb8eb13e185d802c , 0x00003ffd // log(1/frcpa(1+162/2^-8))
+data8 0xfcc8e3659d9bcbed , 0x00003ffd // log(1/frcpa(1+163/2^-8))
+data8 0xfe03d6d34d487fd2 , 0x00003ffd // log(1/frcpa(1+164/2^-8))
+data8 0xff3f8c7581e9f0ae , 0x00003ffd // log(1/frcpa(1+165/2^-8))
+//
+data8 0x803e029e280173ae , 0x00003ffe // log(1/frcpa(1+166/2^-8))
+data8 0x80dca10cc52d0757 , 0x00003ffe // log(1/frcpa(1+167/2^-8))
+data8 0x817ba200632755a1 , 0x00003ffe // log(1/frcpa(1+168/2^-8))
+data8 0x821b05f3b01d6774 , 0x00003ffe // log(1/frcpa(1+169/2^-8))
+data8 0x82bacd623ff19d06 , 0x00003ffe // log(1/frcpa(1+170/2^-8))
+//
+data8 0x835af8c88e7a8f47 , 0x00003ffe // log(1/frcpa(1+171/2^-8))
+data8 0x83c5f8299e2b4091 , 0x00003ffe // log(1/frcpa(1+172/2^-8))
+data8 0x8466cb43f3d87300 , 0x00003ffe // log(1/frcpa(1+173/2^-8))
+data8 0x850803a67c80ca4b , 0x00003ffe // log(1/frcpa(1+174/2^-8))
+data8 0x85a9a1d11a23b461 , 0x00003ffe // log(1/frcpa(1+175/2^-8))
+//
+data8 0x864ba644a18e6e05 , 0x00003ffe // log(1/frcpa(1+176/2^-8))
+data8 0x86ee1182dcc432f7 , 0x00003ffe // log(1/frcpa(1+177/2^-8))
+data8 0x875a925d7e48c316 , 0x00003ffe // log(1/frcpa(1+178/2^-8))
+data8 0x87fdaa109d23aef7 , 0x00003ffe // log(1/frcpa(1+179/2^-8))
+data8 0x88a129ed4becfaf2 , 0x00003ffe // log(1/frcpa(1+180/2^-8))
+//
+data8 0x89451278ecd7f9cf , 0x00003ffe // log(1/frcpa(1+181/2^-8))
+data8 0x89b29295f8432617 , 0x00003ffe // log(1/frcpa(1+182/2^-8))
+data8 0x8a572ac5a5496882 , 0x00003ffe // log(1/frcpa(1+183/2^-8))
+data8 0x8afc2d0ce3b2dadf , 0x00003ffe // log(1/frcpa(1+184/2^-8))
+data8 0x8b6a69c608cfd3af , 0x00003ffe // log(1/frcpa(1+185/2^-8))
+//
+data8 0x8c101e106e899a83 , 0x00003ffe // log(1/frcpa(1+186/2^-8))
+data8 0x8cb63de258f9d626 , 0x00003ffe // log(1/frcpa(1+187/2^-8))
+data8 0x8d2539c5bd19e2b1 , 0x00003ffe // log(1/frcpa(1+188/2^-8))
+data8 0x8dcc0e064b29e6f1 , 0x00003ffe // log(1/frcpa(1+189/2^-8))
+data8 0x8e734f45d88357ae , 0x00003ffe // log(1/frcpa(1+190/2^-8))
+//
+data8 0x8ee30cef034a20db , 0x00003ffe // log(1/frcpa(1+191/2^-8))
+data8 0x8f8b0515686d1d06 , 0x00003ffe // log(1/frcpa(1+192/2^-8))
+data8 0x90336bba039bf32f , 0x00003ffe // log(1/frcpa(1+193/2^-8))
+data8 0x90a3edd23d1c9d58 , 0x00003ffe // log(1/frcpa(1+194/2^-8))
+data8 0x914d0de2f5d61b32 , 0x00003ffe // log(1/frcpa(1+195/2^-8))
+//
+data8 0x91be0c20d28173b5 , 0x00003ffe // log(1/frcpa(1+196/2^-8))
+data8 0x9267e737c06cd34a , 0x00003ffe // log(1/frcpa(1+197/2^-8))
+data8 0x92d962ae6abb1237 , 0x00003ffe // log(1/frcpa(1+198/2^-8))
+data8 0x9383fa6afbe2074c , 0x00003ffe // log(1/frcpa(1+199/2^-8))
+data8 0x942f0421651c1c4e , 0x00003ffe // log(1/frcpa(1+200/2^-8))
+//
+data8 0x94a14a3845bb985e , 0x00003ffe // log(1/frcpa(1+201/2^-8))
+data8 0x954d133857f861e7 , 0x00003ffe // log(1/frcpa(1+202/2^-8))
+data8 0x95bfd96468e604c4 , 0x00003ffe // log(1/frcpa(1+203/2^-8))
+data8 0x9632d31cafafa858 , 0x00003ffe // log(1/frcpa(1+204/2^-8))
+data8 0x96dfaabd86fa1647 , 0x00003ffe // log(1/frcpa(1+205/2^-8))
+//
+data8 0x9753261fcbb2a594 , 0x00003ffe // log(1/frcpa(1+206/2^-8))
+data8 0x9800c11b426b996d , 0x00003ffe // log(1/frcpa(1+207/2^-8))
+data8 0x9874bf4d45ae663c , 0x00003ffe // log(1/frcpa(1+208/2^-8))
+data8 0x99231f5ee9a74f79 , 0x00003ffe // log(1/frcpa(1+209/2^-8))
+data8 0x9997a18a56bcad28 , 0x00003ffe // log(1/frcpa(1+210/2^-8))
+//
+data8 0x9a46c873a3267e79 , 0x00003ffe // log(1/frcpa(1+211/2^-8))
+data8 0x9abbcfc621eb6cb6 , 0x00003ffe // log(1/frcpa(1+212/2^-8))
+data8 0x9b310cb0d354c990 , 0x00003ffe // log(1/frcpa(1+213/2^-8))
+data8 0x9be14cf9e1b3515c , 0x00003ffe // log(1/frcpa(1+214/2^-8))
+data8 0x9c5710b8cbb73a43 , 0x00003ffe // log(1/frcpa(1+215/2^-8))
+//
+data8 0x9ccd0abd301f399c , 0x00003ffe // log(1/frcpa(1+216/2^-8))
+data8 0x9d7e67f3bdce8888 , 0x00003ffe // log(1/frcpa(1+217/2^-8))
+data8 0x9df4ea81a99daa01 , 0x00003ffe // log(1/frcpa(1+218/2^-8))
+data8 0x9e6ba405a54514ba , 0x00003ffe // log(1/frcpa(1+219/2^-8))
+data8 0x9f1e21c8c7bb62b3 , 0x00003ffe // log(1/frcpa(1+220/2^-8))
+//
+data8 0x9f956593f6b6355c , 0x00003ffe // log(1/frcpa(1+221/2^-8))
+data8 0xa00ce1092e5498c3 , 0x00003ffe // log(1/frcpa(1+222/2^-8))
+data8 0xa0c08309c4b912c1 , 0x00003ffe // log(1/frcpa(1+223/2^-8))
+data8 0xa1388a8c6faa2afa , 0x00003ffe // log(1/frcpa(1+224/2^-8))
+data8 0xa1b0ca7095b5f985 , 0x00003ffe // log(1/frcpa(1+225/2^-8))
+//
+data8 0xa22942eb47534a00 , 0x00003ffe // log(1/frcpa(1+226/2^-8))
+data8 0xa2de62326449d0a3 , 0x00003ffe // log(1/frcpa(1+227/2^-8))
+data8 0xa357690f88bfe345 , 0x00003ffe // log(1/frcpa(1+228/2^-8))
+data8 0xa3d0a93f45169a4b , 0x00003ffe // log(1/frcpa(1+229/2^-8))
+data8 0xa44a22f7ffe65f30 , 0x00003ffe // log(1/frcpa(1+230/2^-8))
+//
+data8 0xa500c5e5b4c1aa36 , 0x00003ffe // log(1/frcpa(1+231/2^-8))
+data8 0xa57ad064eb2ebbc2 , 0x00003ffe // log(1/frcpa(1+232/2^-8))
+data8 0xa5f5152dedf4384e , 0x00003ffe // log(1/frcpa(1+233/2^-8))
+data8 0xa66f9478856233ec , 0x00003ffe // log(1/frcpa(1+234/2^-8))
+data8 0xa6ea4e7cca02c32e , 0x00003ffe // log(1/frcpa(1+235/2^-8))
+//
+data8 0xa765437325341ccf , 0x00003ffe // log(1/frcpa(1+236/2^-8))
+data8 0xa81e21e6c75b4020 , 0x00003ffe // log(1/frcpa(1+237/2^-8))
+data8 0xa899ab333fe2b9ca , 0x00003ffe // log(1/frcpa(1+238/2^-8))
+data8 0xa9157039c51ebe71 , 0x00003ffe // log(1/frcpa(1+239/2^-8))
+data8 0xa991713433c2b999 , 0x00003ffe // log(1/frcpa(1+240/2^-8))
+//
+data8 0xaa0dae5cbcc048b3 , 0x00003ffe // log(1/frcpa(1+241/2^-8))
+data8 0xaa8a27ede5eb13ad , 0x00003ffe // log(1/frcpa(1+242/2^-8))
+data8 0xab06de228a9e3499 , 0x00003ffe // log(1/frcpa(1+243/2^-8))
+data8 0xab83d135dc633301 , 0x00003ffe // log(1/frcpa(1+244/2^-8))
+data8 0xac3fb076adc7fe7a , 0x00003ffe // log(1/frcpa(1+245/2^-8))
+//
+data8 0xacbd3cbbe47988f1 , 0x00003ffe // log(1/frcpa(1+246/2^-8))
+data8 0xad3b06b1a5dc57c3 , 0x00003ffe // log(1/frcpa(1+247/2^-8))
+data8 0xadb90e94af887717 , 0x00003ffe // log(1/frcpa(1+248/2^-8))
+data8 0xae3754a218f7c816 , 0x00003ffe // log(1/frcpa(1+249/2^-8))
+data8 0xaeb5d9175437afa2 , 0x00003ffe // log(1/frcpa(1+250/2^-8))
+//
+data8 0xaf349c322e9c7cee , 0x00003ffe // log(1/frcpa(1+251/2^-8))
+data8 0xafb39e30d1768d1c , 0x00003ffe // log(1/frcpa(1+252/2^-8))
+data8 0xb032df51c2c93116 , 0x00003ffe // log(1/frcpa(1+253/2^-8))
+data8 0xb0b25fd3e6035ad9 , 0x00003ffe // log(1/frcpa(1+254/2^-8))
+data8 0xb1321ff67cba178c , 0x00003ffe // log(1/frcpa(1+255/2^-8))
+LOCAL_OBJECT_END(log_table_3)
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(acosh)
+
+{ .mfi
+ getf.exp acosh_GR_f8 = f8
+ fclass.m p6,p0 = f8, 0xc3 // Test for x = NaN
+ mov log_GR_comp2 = 0x1003e
+}
+{ .mfi
+ addl NR_table_address = @ltoff(log_table_1), gp
+ fms.s1 log_y = f8, f8, f1 // y = x^2-1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.sig acosh_GR_f8_sig = f8
+ fclass.m p11,p0 = f8, 0x21 // Test for x=+inf
+ mov log_GR_exp_17_ones = 0x1ffff
+}
+{ .mfi
+ ld8 NR_table_address = [NR_table_address]
+ fms.s1 log_w = f8,f1,f1 // w = x - 1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.lt.s1 p7,p8 = f8, f1 // Test for x<1.0
+ addl log_GR_comp = 0x10020C,r0 // Upper 21 bits of signif of 1.0005
+}
+{ .mfb
+ mov log_GR_exp_16_ones = 0xffff //BIAS
+(p6) fma.d.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
+(p6) br.ret.spnt b0 // Exit for x=nan
+}
+;;
+
+{ .mfb
+ //get second table address
+ adds log_table_address2 = 0x40, NR_table_address
+ fcmp.eq.s1 p10,p0 = f8, f1 // Test for x=+1.0
+(p11) br.ret.spnt b0 // Exit for x=+inf
+}
+;;
+
+{ .mfi
+ ldfpd NR1,NR2 = [log_table_address2],16
+ frsqrta.s1 log_y_rs,p0 = log_y // z=1/sqrt(y)
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s1 log_arg = f8,f1,f8
+(p7) br.cond.spnt ACOSH_LESS_ONE // Branch if path 7, x < 1.0
+}
+;;
+
+{ .mfi
+ ldfe log_C4 = [log_table_address2],16
+(p8) fcmp.eq.s0 p6,p0 = f8, f0 // Dummy op sets denorm flag if unorm>=1.0
+ nop.i 0
+}
+{ .mfb
+(p8) cmp.le.unc p13,p0 = log_GR_comp2,acosh_GR_f8
+ nop.f 0
+(p13) br.cond.spnt LOG_COMMON1 // Branch if path 4, x >= 2^63
+}
+;;
+
+{ .mfi
+ ldfe log_C3 = [log_table_address2],16
+(p10) fmerge.s f8 = f0, f0 // Return 0 if x=1.0
+ shr.u acosh_GR_f8_sig = acosh_GR_f8_sig,43
+}
+{ .mib
+ cmp.eq p14,p0 = log_GR_exp_16_ones,acosh_GR_f8
+ nop.i 0
+(p10) br.ret.spnt b0 // Exit for x=1.0
+}
+;;
+
+{ .mfi
+ ldfe log_C2 = [log_table_address2],16
+ frsqrta.s1 acosh_w_rs,p0 = log_w // t=1/sqrt(w)
+ nop.i 0
+}
+{ .mfb
+(p14) cmp.lt.unc p15,p0 = acosh_GR_f8_sig,log_GR_comp
+ nop.f 0
+(p15) br.cond.spnt ACOSH_NEAR_ONE // Branch if path 2, 1.0 < x < 1.0005
+}
+;;
+
+// Here is main path, 1.0005 <= x < 2^63
+/////////////// The first iteration //////////////////////////////////
+{ .mfi
+ ldfpd acosh_comp,log_P5 = [NR_table_address],16
+ fma.s1 log_y_rs_iter = log_y_rs,log_y,f0 // y*z
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfpd log_P4,log_P3 = [NR_table_address],16
+ fnma.s1 log_y_rs_iter = log_y_rs_iter,log_y_rs,NR2 // 3-(y*z)*z
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs,NR1,f0 // 0.5*z
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfpd log_P2,log_P1 = [NR_table_address],16
+ //(0.5*z)*(3-(y*z)*z)
+ fma.s1 log_y_rs_iter = log_y_rs_iter1,log_y_rs_iter,f0
+ nop.i 0
+}
+;;
+
+/////////////////////////// The second iteration /////////////////////////////
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs = log_y_rs_iter,log_y,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 log_y_rs = log_y_rs,log_y_rs_iter,NR2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs_iter,NR1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //(0.5*z)*(3-(y*z)*z)
+ fma.s1 log_y_rs_iter = log_y_rs_iter1,log_y_rs,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ //(0.5*z)*(3-(y*z)*z)
+ fma.s1 log_arg_early = log_y_rs_iter1,log_y_rs,f0
+ nop.i 0
+}
+;;
+
+//////////////////////////////////////// The third iteration /////////////////
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs = log_y_rs_iter,log_y,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs_iter,NR1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_arg_early = log_arg_early,log_y,f8
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 log_y_rs = log_y_rs,log_y_rs_iter,NR2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs_iter1,log_y,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ frcpa.s1 log_C,p0 = f1,log_arg_early
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp log_GR_signexp_f8 = log_arg_early
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.sig log_GR_significand_f8 = log_arg_early
+ fma.s1 log_arg = log_y_rs_iter1,log_y_rs,f8 // (0.5*z)*(3-(y*z)*z)
+ adds log_table_address3 = 0x70, NR_table_address
+}
+;;
+
+///////////////////////////////// The end NR iterations /////////////////////
+{ .mfi
+ ldfe log2 = [NR_table_address],16
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mmi
+ //significant bit destruction
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+;;
+ //BIAS subtraction
+ sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.sig log_int_Nfloat = log_GR_true_exp_f8
+ fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1
+ extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
+}
+;;
+
+{ .mmi
+ //pre-index*16 + index
+ shladd log_table_address3 = log_GR_index,4,log_table_address3
+;;
+ ldfe log_T = [log_table_address3]
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rsq = log_r, log_r, f0 //r^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p4 = log_P5, log_r, log_P4 //P5*r + P4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //convert N to the floating-point format log_Nfloat
+ fcvt.xf log_Nfloat = log_int_Nfloat
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rcube = log_rsq, log_r, f0 //r^3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //(P5*r + P4)*r^2 + P3*r + P2
+ fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ //((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r
+ fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r
+ fadd.d.s0 f8 = log_T_plus_Nlog2, log_r2P_r
+ br.ret.sptk b0 // Exit main path, path 3: 1.0005 <= x < 2^63
+}
+;;
+
+// Here if path 2, 1.0 < x < 1.0005
+ACOSH_NEAR_ONE:
+// The first NR iteration
+{ .mfi
+ ldfe log_C1 = [log_table_address2],16
+ fma.s1 acosh_w_iter1 = acosh_w_rs,log_w,f0 //t*w
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_1 = f8,log_C4,log_C3 //x*C4 + C3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe log_C0 = [log_table_address2],16
+ fma.s1 acosh_w_iter2 = acosh_w_rs,NR1,f0 //t*0.5
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 acosh_w_iter1 = acosh_w_iter1,acosh_w_rs,NR2 //3-t*t*w
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //(3-t*t*w)*t*0.5
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C2 //(x*C4 + C3)*(x-1) + C2
+ nop.i 0
+}
+;;
+
+// The second NR iteration
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ //((x*C4 + C3)*(x-1) + C2)*(x-1) + C1
+ fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ //(((x*C4 + C3)*(x-1) + C2)*(x-1) + C1)*(x-1) + C0
+ fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C0
+ nop.i 0
+}
+;;
+
+//The third NR iteration
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_sqrt = acosh_w_iter2,log_w,f0
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = acosh_w_1,acosh_w_sqrt,f0
+ br.ret.sptk b0 // Exit path 2, 1.0 < x < 1.0005
+}
+;;
+
+// Here if path 4, x >= 2^63
+LOG_COMMON1:
+{ .mfi
+ ldfpd acosh_comp,log_P5 = [NR_table_address],16
+ frcpa.s1 log_C,p0 = f1,log_arg
+ nop.i 0
+}
+;;
+
+{ .mmi
+ getf.exp log_GR_signexp_f8 = log_arg
+ ldfpd log_P4,log_P3 = [NR_table_address],16
+ nop.i 0
+}
+;;
+
+{ .mmi
+ getf.sig log_GR_significand_f8 = log_arg
+ ldfpd log_P2,log_P1 = [NR_table_address],16
+ nop.i 0
+}
+;;
+
+{ .mfi
+ adds log_table_address3 = 0x70, NR_table_address
+ nop.f 0
+ //significant bit destruction
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+}
+;;
+
+{ .mmf
+ ldfe log2 = [NR_table_address],16
+ //BIAS subtraction
+ sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+ fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1
+}
+;;
+
+{ .mfi
+ setf.sig log_int_Nfloat = log_GR_true_exp_f8
+ nop.f 0
+ extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
+}
+;;
+
+{ .mmi
+ //pre-index*16 + index
+ shladd log_table_address3 = log_GR_index,4,log_table_address3
+;;
+ ldfe log_T = [log_table_address3]
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rsq = log_r, log_r, f0 //r^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p4 = log_P5, log_r, log_P4 //P5*r + P4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rcube = log_rsq, log_r, f0 //r^3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p10 = log_rsq, log_P1, log_r //P1*r^2 + r
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //convert N to the floating-point format log_Nfloat
+ fcvt.xf log_Nfloat = log_int_Nfloat
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ //(P5*r + P4)*r^2 + P3*r + P2
+ fma.s1 log_rp_p2 = log_rp_p4, log_rsq, log_rp_p32
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ //((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r
+ fma.s1 log_r2P_r = log_rp_p2, log_rcube, log_rp_p10
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // N*log2 + T + ((P5*r + P4)*r^2 + P3*r + P2)*w^3 + P1*r^2 + r
+ fadd.d.s0 f8 = log_T_plus_Nlog2, log_r2P_r
+ br.ret.sptk b0 // Exit path 4, x >= 2^63
+}
+;;
+
+// Here if path 7, x < 1.0
+ACOSH_LESS_ONE:
+{ .mfi
+ alloc r32 = ar.pfs,1,3,4,0
+ fmerge.s f10 = f8,f8
+ nop.i 0
+}
+;;
+
+{ .mfb
+ mov acosh_GR_tag = 136
+ frcpa.s0 f8,p0 = f0,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+GLOBAL_LIBM_END(acosh)
+libm_alias_double_other (acosh, acosh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acoshf.S b/sysdeps/ia64/fpu/e_acoshf.S
new file mode 100644
index 0000000000..240c5cba98
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acoshf.S
@@ -0,0 +1,1030 @@
+.file "acoshf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// ==============================================================
+// History
+// ==============================================================
+// 03/28/01 Initial version
+// 04/19/01 Improved speed of the paths #1,2,3,4,5
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 05/14/03 Improved performance, set denormal flag for unorms >= 1.0
+//
+// API
+// ==============================================================
+// float acoshf(float)
+//
+// Overview of operation
+// ==============================================================
+//
+// There are 7 paths:
+// 1. x = 1.0
+// Return acoshf(x) = 0.0
+// 2. 1.0 < x < 1.000499725341796875(0x3FF0020C00000000)
+// Return acoshf(x) = sqrt(x-1) * Pol4(x),
+// where Pol4(x) = (x*C2 + C1)*(x-1) + C0
+//
+// 3. 1.000499725341796875(0x3FF0020C00000000) <= x < 2^51
+// Return acoshf(x) = log(x + sqrt(x^2 -1.0))
+// To compute x + sqrt(x^2 -1.0) modified Newton Raphson method is used
+// (2 iterations)
+// Algorithm description for log function see below.
+//
+// 4. 2^51 <= x < +INF
+// Return acoshf(x) = log(2*x)
+// Algorithm description for log function see below.
+//
+// 5. x = +INF
+// Return acoshf(x) = +INF
+//
+// 6. x = [S,Q]NaN
+// Return acoshf(x) = QNaN
+//
+// 7. x < 1.0
+// It's domain error. Error handler with tag = 137 is called
+//
+//==============================================================
+// Algorithm Description for log(x) function
+// Below we are using the fact that inequality x - 1.0 > 2^(-6) is always
+// true for this acosh implementation
+//
+// Consider x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(frcpa(x) x/frcpa(x))
+// = log(1/frcpa(x)) + log(frcpa(x) x)
+// = -log(frcpa(x)) + log(frcpa(x) x)
+//
+// frcpa(x) = 2^-N frcpa((1.f1 f2 ... f63)
+//
+// -log(frcpa(x)) = -log(C)
+// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = +Nlog2 + log(frcpa(1.f1 f2 ... f63))
+//
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+//
+// Log(x) = +Nlog2 + log(1./frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 - log(/frcpa(1.f1 f2 ... f63)) + log(frcpa(x) x)
+// Log(x) = +Nlog2 + T + log(frcpa(x) x)
+//
+// Log(x) = +Nlog2 + T + log(C x)
+//
+// Cx = 1 + r
+//
+// Log(x) = +Nlog2 + T + log(1+r)
+// Log(x) = +Nlog2 + T + Series( r - r^2/2 + r^3/3 - r^4/4 ....)
+//
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
+//
+// Implementation
+//==============================================================
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
+//
+// x = f * 2*n where f is 1.f_1f_2f_3....f_63
+// Nfloat = float(n) where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 8
+// get the dxt table entry at index + offset = T
+//
+// result = (T + Nfloat * log(2)) + rseries
+//
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+// y_k = frcpa(x_k)
+// log(1/y_k) in quad and round to double
+//
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f9 -> f15, f32 -> f62
+//
+// General registers used:
+// r14 -> r27, r32 -> r39
+//
+// Predicate registers used:
+// p6 -> p15
+//
+// p6 to filter out case when x = [Q,S]NaN
+// p7,p8 to filter out case when x < 1.0
+//
+// p10 to select path #1
+// p11 to filter out case when x = +INF
+// p12 used in the frcpa
+// p13 to select path #4
+// p14,p15 to select path #2
+
+// Assembly macros
+//==============================================================
+log_GR_exp_17_ones = r14
+log_GR_signexp_f8 = r15
+log_table_address2 = r16
+log_GR_exp_16_ones = r17
+log_GR_exp_f8 = r18
+log_GR_true_exp_f8 = r19
+log_GR_significand_f8 = r20
+log_GR_index = r21
+log_GR_comp2 = r22
+acosh_GR_f8 = r23
+log_GR_comp = r24
+acosh_GR_f8_sig = r25
+log_table_address3 = r26
+NR_table_address = r27
+
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_SAVE_PFS = r35
+
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+acosh_GR_tag = r39
+
+//==============================================================
+log_y = f9
+NR1 = f10
+NR2 = f11
+log_y_rs = f12
+log_y_rs_iter = f13
+log_y_rs_iter1 = f14
+log_NORM_f8 = f15
+log_w = f32
+acosh_comp = f34
+acosh_comp2 = f33
+log_P3 = f35
+log_P2 = f36
+log_P1 = f37
+log2 = f38
+log_C0 = f39
+log_C1 = f40
+log_C2 = f41
+acosh_w_rs = f42
+log_C = f43
+log_arg = f44
+acosh_w_iter1 = f45
+acosh_w_iter2 = f46
+log_int_Nfloat = f47
+log_r = f48
+log_rsq = f49
+log_rp_p4 = f50
+log_rp_p32 = f51
+log_rcube = f52
+log_rp_p10 = f53
+log_rp_p2 = f54
+log_Nfloat = f55
+log_T = f56
+log_r2P_r = f57
+log_T_plus_Nlog2 = f58
+acosh_w_sqrt = f59
+acosh_w_1 = f60
+log_arg_early = f61
+log_y_rs_iter2 = f62
+
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(log_table_1)
+data8 0xbfd0001008f39d59 // p3
+data8 0x3fd5556073e0c45a // p2
+data8 0xbfdffffffffaea15 // p1
+data8 0x3FE62E42FEFA39EF // log2
+LOCAL_OBJECT_END(log_table_1)
+
+LOCAL_OBJECT_START(log_table_2)
+
+data8 0x3FE0000000000000 // 0.5
+data8 0x4008000000000000 // 3.0
+data8 0xD92CBAD213719F11, 0x00003FF9 // C2 3FF9D92CBAD213719F11
+data8 0x93D38EBF2EC9B073, 0x0000BFFC // C1 BFFC93D38EBF2EC9B073
+data8 0xB504F333F9DA0E32, 0x00003FFF // C0 3FFFB504F333F9DA0E32
+LOCAL_OBJECT_END(log_table_2)
+
+LOCAL_OBJECT_START(log_table_3)
+data8 0x3F60040155D5889E //log(1/frcpa(1+ 0/256)
+data8 0x3F78121214586B54 //log(1/frcpa(1+ 1/256)
+data8 0x3F841929F96832F0 //log(1/frcpa(1+ 2/256)
+data8 0x3F8C317384C75F06 //log(1/frcpa(1+ 3/256)
+data8 0x3F91A6B91AC73386 //log(1/frcpa(1+ 4/256)
+data8 0x3F95BA9A5D9AC039 //log(1/frcpa(1+ 5/256)
+data8 0x3F99D2A8074325F4 //log(1/frcpa(1+ 6/256)
+data8 0x3F9D6B2725979802 //log(1/frcpa(1+ 7/256)
+data8 0x3FA0C58FA19DFAAA //log(1/frcpa(1+ 8/256)
+data8 0x3FA2954C78CBCE1B //log(1/frcpa(1+ 9/256)
+data8 0x3FA4A94D2DA96C56 //log(1/frcpa(1+ 10/256)
+data8 0x3FA67C94F2D4BB58 //log(1/frcpa(1+ 11/256)
+data8 0x3FA85188B630F068 //log(1/frcpa(1+ 12/256)
+data8 0x3FAA6B8ABE73AF4C //log(1/frcpa(1+ 13/256)
+data8 0x3FAC441E06F72A9E //log(1/frcpa(1+ 14/256)
+data8 0x3FAE1E6713606D07 //log(1/frcpa(1+ 15/256)
+data8 0x3FAFFA6911AB9301 //log(1/frcpa(1+ 16/256)
+data8 0x3FB0EC139C5DA601 //log(1/frcpa(1+ 17/256)
+data8 0x3FB1DBD2643D190B //log(1/frcpa(1+ 18/256)
+data8 0x3FB2CC7284FE5F1C //log(1/frcpa(1+ 19/256)
+data8 0x3FB3BDF5A7D1EE64 //log(1/frcpa(1+ 20/256)
+data8 0x3FB4B05D7AA012E0 //log(1/frcpa(1+ 21/256)
+data8 0x3FB580DB7CEB5702 //log(1/frcpa(1+ 22/256)
+data8 0x3FB674F089365A7A //log(1/frcpa(1+ 23/256)
+data8 0x3FB769EF2C6B568D //log(1/frcpa(1+ 24/256)
+data8 0x3FB85FD927506A48 //log(1/frcpa(1+ 25/256)
+data8 0x3FB9335E5D594989 //log(1/frcpa(1+ 26/256)
+data8 0x3FBA2B0220C8E5F5 //log(1/frcpa(1+ 27/256)
+data8 0x3FBB0004AC1A86AC //log(1/frcpa(1+ 28/256)
+data8 0x3FBBF968769FCA11 //log(1/frcpa(1+ 29/256)
+data8 0x3FBCCFEDBFEE13A8 //log(1/frcpa(1+ 30/256)
+data8 0x3FBDA727638446A2 //log(1/frcpa(1+ 31/256)
+data8 0x3FBEA3257FE10F7A //log(1/frcpa(1+ 32/256)
+data8 0x3FBF7BE9FEDBFDE6 //log(1/frcpa(1+ 33/256)
+data8 0x3FC02AB352FF25F4 //log(1/frcpa(1+ 34/256)
+data8 0x3FC097CE579D204D //log(1/frcpa(1+ 35/256)
+data8 0x3FC1178E8227E47C //log(1/frcpa(1+ 36/256)
+data8 0x3FC185747DBECF34 //log(1/frcpa(1+ 37/256)
+data8 0x3FC1F3B925F25D41 //log(1/frcpa(1+ 38/256)
+data8 0x3FC2625D1E6DDF57 //log(1/frcpa(1+ 39/256)
+data8 0x3FC2D1610C86813A //log(1/frcpa(1+ 40/256)
+data8 0x3FC340C59741142E //log(1/frcpa(1+ 41/256)
+data8 0x3FC3B08B6757F2A9 //log(1/frcpa(1+ 42/256)
+data8 0x3FC40DFB08378003 //log(1/frcpa(1+ 43/256)
+data8 0x3FC47E74E8CA5F7C //log(1/frcpa(1+ 44/256)
+data8 0x3FC4EF51F6466DE4 //log(1/frcpa(1+ 45/256)
+data8 0x3FC56092E02BA516 //log(1/frcpa(1+ 46/256)
+data8 0x3FC5D23857CD74D5 //log(1/frcpa(1+ 47/256)
+data8 0x3FC6313A37335D76 //log(1/frcpa(1+ 48/256)
+data8 0x3FC6A399DABBD383 //log(1/frcpa(1+ 49/256)
+data8 0x3FC70337DD3CE41B //log(1/frcpa(1+ 50/256)
+data8 0x3FC77654128F6127 //log(1/frcpa(1+ 51/256)
+data8 0x3FC7E9D82A0B022D //log(1/frcpa(1+ 52/256)
+data8 0x3FC84A6B759F512F //log(1/frcpa(1+ 53/256)
+data8 0x3FC8AB47D5F5A310 //log(1/frcpa(1+ 54/256)
+data8 0x3FC91FE49096581B //log(1/frcpa(1+ 55/256)
+data8 0x3FC981634011AA75 //log(1/frcpa(1+ 56/256)
+data8 0x3FC9F6C407089664 //log(1/frcpa(1+ 57/256)
+data8 0x3FCA58E729348F43 //log(1/frcpa(1+ 58/256)
+data8 0x3FCABB55C31693AD //log(1/frcpa(1+ 59/256)
+data8 0x3FCB1E104919EFD0 //log(1/frcpa(1+ 60/256)
+data8 0x3FCB94EE93E367CB //log(1/frcpa(1+ 61/256)
+data8 0x3FCBF851C067555F //log(1/frcpa(1+ 62/256)
+data8 0x3FCC5C0254BF23A6 //log(1/frcpa(1+ 63/256)
+data8 0x3FCCC000C9DB3C52 //log(1/frcpa(1+ 64/256)
+data8 0x3FCD244D99C85674 //log(1/frcpa(1+ 65/256)
+data8 0x3FCD88E93FB2F450 //log(1/frcpa(1+ 66/256)
+data8 0x3FCDEDD437EAEF01 //log(1/frcpa(1+ 67/256)
+data8 0x3FCE530EFFE71012 //log(1/frcpa(1+ 68/256)
+data8 0x3FCEB89A1648B971 //log(1/frcpa(1+ 69/256)
+data8 0x3FCF1E75FADF9BDE //log(1/frcpa(1+ 70/256)
+data8 0x3FCF84A32EAD7C35 //log(1/frcpa(1+ 71/256)
+data8 0x3FCFEB2233EA07CD //log(1/frcpa(1+ 72/256)
+data8 0x3FD028F9C7035C1C //log(1/frcpa(1+ 73/256)
+data8 0x3FD05C8BE0D9635A //log(1/frcpa(1+ 74/256)
+data8 0x3FD085EB8F8AE797 //log(1/frcpa(1+ 75/256)
+data8 0x3FD0B9C8E32D1911 //log(1/frcpa(1+ 76/256)
+data8 0x3FD0EDD060B78081 //log(1/frcpa(1+ 77/256)
+data8 0x3FD122024CF0063F //log(1/frcpa(1+ 78/256)
+data8 0x3FD14BE2927AECD4 //log(1/frcpa(1+ 79/256)
+data8 0x3FD180618EF18ADF //log(1/frcpa(1+ 80/256)
+data8 0x3FD1B50BBE2FC63B //log(1/frcpa(1+ 81/256)
+data8 0x3FD1DF4CC7CF242D //log(1/frcpa(1+ 82/256)
+data8 0x3FD214456D0EB8D4 //log(1/frcpa(1+ 83/256)
+data8 0x3FD23EC5991EBA49 //log(1/frcpa(1+ 84/256)
+data8 0x3FD2740D9F870AFB //log(1/frcpa(1+ 85/256)
+data8 0x3FD29ECDABCDFA04 //log(1/frcpa(1+ 86/256)
+data8 0x3FD2D46602ADCCEE //log(1/frcpa(1+ 87/256)
+data8 0x3FD2FF66B04EA9D4 //log(1/frcpa(1+ 88/256)
+data8 0x3FD335504B355A37 //log(1/frcpa(1+ 89/256)
+data8 0x3FD360925EC44F5D //log(1/frcpa(1+ 90/256)
+data8 0x3FD38BF1C3337E75 //log(1/frcpa(1+ 91/256)
+data8 0x3FD3C25277333184 //log(1/frcpa(1+ 92/256)
+data8 0x3FD3EDF463C1683E //log(1/frcpa(1+ 93/256)
+data8 0x3FD419B423D5E8C7 //log(1/frcpa(1+ 94/256)
+data8 0x3FD44591E0539F49 //log(1/frcpa(1+ 95/256)
+data8 0x3FD47C9175B6F0AD //log(1/frcpa(1+ 96/256)
+data8 0x3FD4A8B341552B09 //log(1/frcpa(1+ 97/256)
+data8 0x3FD4D4F3908901A0 //log(1/frcpa(1+ 98/256)
+data8 0x3FD501528DA1F968 //log(1/frcpa(1+ 99/256)
+data8 0x3FD52DD06347D4F6 //log(1/frcpa(1+ 100/256)
+data8 0x3FD55A6D3C7B8A8A //log(1/frcpa(1+ 101/256)
+data8 0x3FD5925D2B112A59 //log(1/frcpa(1+ 102/256)
+data8 0x3FD5BF406B543DB2 //log(1/frcpa(1+ 103/256)
+data8 0x3FD5EC433D5C35AE //log(1/frcpa(1+ 104/256)
+data8 0x3FD61965CDB02C1F //log(1/frcpa(1+ 105/256)
+data8 0x3FD646A84935B2A2 //log(1/frcpa(1+ 106/256)
+data8 0x3FD6740ADD31DE94 //log(1/frcpa(1+ 107/256)
+data8 0x3FD6A18DB74A58C5 //log(1/frcpa(1+ 108/256)
+data8 0x3FD6CF31058670EC //log(1/frcpa(1+ 109/256)
+data8 0x3FD6F180E852F0BA //log(1/frcpa(1+ 110/256)
+data8 0x3FD71F5D71B894F0 //log(1/frcpa(1+ 111/256)
+data8 0x3FD74D5AEFD66D5C //log(1/frcpa(1+ 112/256)
+data8 0x3FD77B79922BD37E //log(1/frcpa(1+ 113/256)
+data8 0x3FD7A9B9889F19E2 //log(1/frcpa(1+ 114/256)
+data8 0x3FD7D81B037EB6A6 //log(1/frcpa(1+ 115/256)
+data8 0x3FD8069E33827231 //log(1/frcpa(1+ 116/256)
+data8 0x3FD82996D3EF8BCB //log(1/frcpa(1+ 117/256)
+data8 0x3FD85855776DCBFB //log(1/frcpa(1+ 118/256)
+data8 0x3FD8873658327CCF //log(1/frcpa(1+ 119/256)
+data8 0x3FD8AA75973AB8CF //log(1/frcpa(1+ 120/256)
+data8 0x3FD8D992DC8824E5 //log(1/frcpa(1+ 121/256)
+data8 0x3FD908D2EA7D9512 //log(1/frcpa(1+ 122/256)
+data8 0x3FD92C59E79C0E56 //log(1/frcpa(1+ 123/256)
+data8 0x3FD95BD750EE3ED3 //log(1/frcpa(1+ 124/256)
+data8 0x3FD98B7811A3EE5B //log(1/frcpa(1+ 125/256)
+data8 0x3FD9AF47F33D406C //log(1/frcpa(1+ 126/256)
+data8 0x3FD9DF270C1914A8 //log(1/frcpa(1+ 127/256)
+data8 0x3FDA0325ED14FDA4 //log(1/frcpa(1+ 128/256)
+data8 0x3FDA33440224FA79 //log(1/frcpa(1+ 129/256)
+data8 0x3FDA57725E80C383 //log(1/frcpa(1+ 130/256)
+data8 0x3FDA87D0165DD199 //log(1/frcpa(1+ 131/256)
+data8 0x3FDAAC2E6C03F896 //log(1/frcpa(1+ 132/256)
+data8 0x3FDADCCC6FDF6A81 //log(1/frcpa(1+ 133/256)
+data8 0x3FDB015B3EB1E790 //log(1/frcpa(1+ 134/256)
+data8 0x3FDB323A3A635948 //log(1/frcpa(1+ 135/256)
+data8 0x3FDB56FA04462909 //log(1/frcpa(1+ 136/256)
+data8 0x3FDB881AA659BC93 //log(1/frcpa(1+ 137/256)
+data8 0x3FDBAD0BEF3DB165 //log(1/frcpa(1+ 138/256)
+data8 0x3FDBD21297781C2F //log(1/frcpa(1+ 139/256)
+data8 0x3FDC039236F08819 //log(1/frcpa(1+ 140/256)
+data8 0x3FDC28CB1E4D32FD //log(1/frcpa(1+ 141/256)
+data8 0x3FDC4E19B84723C2 //log(1/frcpa(1+ 142/256)
+data8 0x3FDC7FF9C74554C9 //log(1/frcpa(1+ 143/256)
+data8 0x3FDCA57B64E9DB05 //log(1/frcpa(1+ 144/256)
+data8 0x3FDCCB130A5CEBB0 //log(1/frcpa(1+ 145/256)
+data8 0x3FDCF0C0D18F326F //log(1/frcpa(1+ 146/256)
+data8 0x3FDD232075B5A201 //log(1/frcpa(1+ 147/256)
+data8 0x3FDD490246DEFA6B //log(1/frcpa(1+ 148/256)
+data8 0x3FDD6EFA918D25CD //log(1/frcpa(1+ 149/256)
+data8 0x3FDD9509707AE52F //log(1/frcpa(1+ 150/256)
+data8 0x3FDDBB2EFE92C554 //log(1/frcpa(1+ 151/256)
+data8 0x3FDDEE2F3445E4AF //log(1/frcpa(1+ 152/256)
+data8 0x3FDE148A1A2726CE //log(1/frcpa(1+ 153/256)
+data8 0x3FDE3AFC0A49FF40 //log(1/frcpa(1+ 154/256)
+data8 0x3FDE6185206D516E //log(1/frcpa(1+ 155/256)
+data8 0x3FDE882578823D52 //log(1/frcpa(1+ 156/256)
+data8 0x3FDEAEDD2EAC990C //log(1/frcpa(1+ 157/256)
+data8 0x3FDED5AC5F436BE3 //log(1/frcpa(1+ 158/256)
+data8 0x3FDEFC9326D16AB9 //log(1/frcpa(1+ 159/256)
+data8 0x3FDF2391A2157600 //log(1/frcpa(1+ 160/256)
+data8 0x3FDF4AA7EE03192D //log(1/frcpa(1+ 161/256)
+data8 0x3FDF71D627C30BB0 //log(1/frcpa(1+ 162/256)
+data8 0x3FDF991C6CB3B379 //log(1/frcpa(1+ 163/256)
+data8 0x3FDFC07ADA69A910 //log(1/frcpa(1+ 164/256)
+data8 0x3FDFE7F18EB03D3E //log(1/frcpa(1+ 165/256)
+data8 0x3FE007C053C5002E //log(1/frcpa(1+ 166/256)
+data8 0x3FE01B942198A5A1 //log(1/frcpa(1+ 167/256)
+data8 0x3FE02F74400C64EB //log(1/frcpa(1+ 168/256)
+data8 0x3FE04360BE7603AD //log(1/frcpa(1+ 169/256)
+data8 0x3FE05759AC47FE34 //log(1/frcpa(1+ 170/256)
+data8 0x3FE06B5F1911CF52 //log(1/frcpa(1+ 171/256)
+data8 0x3FE078BF0533C568 //log(1/frcpa(1+ 172/256)
+data8 0x3FE08CD9687E7B0E //log(1/frcpa(1+ 173/256)
+data8 0x3FE0A10074CF9019 //log(1/frcpa(1+ 174/256)
+data8 0x3FE0B5343A234477 //log(1/frcpa(1+ 175/256)
+data8 0x3FE0C974C89431CE //log(1/frcpa(1+ 176/256)
+data8 0x3FE0DDC2305B9886 //log(1/frcpa(1+ 177/256)
+data8 0x3FE0EB524BAFC918 //log(1/frcpa(1+ 178/256)
+data8 0x3FE0FFB54213A476 //log(1/frcpa(1+ 179/256)
+data8 0x3FE114253DA97D9F //log(1/frcpa(1+ 180/256)
+data8 0x3FE128A24F1D9AFF //log(1/frcpa(1+ 181/256)
+data8 0x3FE1365252BF0865 //log(1/frcpa(1+ 182/256)
+data8 0x3FE14AE558B4A92D //log(1/frcpa(1+ 183/256)
+data8 0x3FE15F85A19C765B //log(1/frcpa(1+ 184/256)
+data8 0x3FE16D4D38C119FA //log(1/frcpa(1+ 185/256)
+data8 0x3FE18203C20DD133 //log(1/frcpa(1+ 186/256)
+data8 0x3FE196C7BC4B1F3B //log(1/frcpa(1+ 187/256)
+data8 0x3FE1A4A738B7A33C //log(1/frcpa(1+ 188/256)
+data8 0x3FE1B981C0C9653D //log(1/frcpa(1+ 189/256)
+data8 0x3FE1CE69E8BB106B //log(1/frcpa(1+ 190/256)
+data8 0x3FE1DC619DE06944 //log(1/frcpa(1+ 191/256)
+data8 0x3FE1F160A2AD0DA4 //log(1/frcpa(1+ 192/256)
+data8 0x3FE2066D7740737E //log(1/frcpa(1+ 193/256)
+data8 0x3FE2147DBA47A394 //log(1/frcpa(1+ 194/256)
+data8 0x3FE229A1BC5EBAC3 //log(1/frcpa(1+ 195/256)
+data8 0x3FE237C1841A502E //log(1/frcpa(1+ 196/256)
+data8 0x3FE24CFCE6F80D9A //log(1/frcpa(1+ 197/256)
+data8 0x3FE25B2C55CD5762 //log(1/frcpa(1+ 198/256)
+data8 0x3FE2707F4D5F7C41 //log(1/frcpa(1+ 199/256)
+data8 0x3FE285E0842CA384 //log(1/frcpa(1+ 200/256)
+data8 0x3FE294294708B773 //log(1/frcpa(1+ 201/256)
+data8 0x3FE2A9A2670AFF0C //log(1/frcpa(1+ 202/256)
+data8 0x3FE2B7FB2C8D1CC1 //log(1/frcpa(1+ 203/256)
+data8 0x3FE2C65A6395F5F5 //log(1/frcpa(1+ 204/256)
+data8 0x3FE2DBF557B0DF43 //log(1/frcpa(1+ 205/256)
+data8 0x3FE2EA64C3F97655 //log(1/frcpa(1+ 206/256)
+data8 0x3FE3001823684D73 //log(1/frcpa(1+ 207/256)
+data8 0x3FE30E97E9A8B5CD //log(1/frcpa(1+ 208/256)
+data8 0x3FE32463EBDD34EA //log(1/frcpa(1+ 209/256)
+data8 0x3FE332F4314AD796 //log(1/frcpa(1+ 210/256)
+data8 0x3FE348D90E7464D0 //log(1/frcpa(1+ 211/256)
+data8 0x3FE35779F8C43D6E //log(1/frcpa(1+ 212/256)
+data8 0x3FE36621961A6A99 //log(1/frcpa(1+ 213/256)
+data8 0x3FE37C299F3C366A //log(1/frcpa(1+ 214/256)
+data8 0x3FE38AE2171976E7 //log(1/frcpa(1+ 215/256)
+data8 0x3FE399A157A603E7 //log(1/frcpa(1+ 216/256)
+data8 0x3FE3AFCCFE77B9D1 //log(1/frcpa(1+ 217/256)
+data8 0x3FE3BE9D503533B5 //log(1/frcpa(1+ 218/256)
+data8 0x3FE3CD7480B4A8A3 //log(1/frcpa(1+ 219/256)
+data8 0x3FE3E3C43918F76C //log(1/frcpa(1+ 220/256)
+data8 0x3FE3F2ACB27ED6C7 //log(1/frcpa(1+ 221/256)
+data8 0x3FE4019C2125CA93 //log(1/frcpa(1+ 222/256)
+data8 0x3FE4181061389722 //log(1/frcpa(1+ 223/256)
+data8 0x3FE42711518DF545 //log(1/frcpa(1+ 224/256)
+data8 0x3FE436194E12B6BF //log(1/frcpa(1+ 225/256)
+data8 0x3FE445285D68EA69 //log(1/frcpa(1+ 226/256)
+data8 0x3FE45BCC464C893A //log(1/frcpa(1+ 227/256)
+data8 0x3FE46AED21F117FC //log(1/frcpa(1+ 228/256)
+data8 0x3FE47A1527E8A2D3 //log(1/frcpa(1+ 229/256)
+data8 0x3FE489445EFFFCCC //log(1/frcpa(1+ 230/256)
+data8 0x3FE4A018BCB69835 //log(1/frcpa(1+ 231/256)
+data8 0x3FE4AF5A0C9D65D7 //log(1/frcpa(1+ 232/256)
+data8 0x3FE4BEA2A5BDBE87 //log(1/frcpa(1+ 233/256)
+data8 0x3FE4CDF28F10AC46 //log(1/frcpa(1+ 234/256)
+data8 0x3FE4DD49CF994058 //log(1/frcpa(1+ 235/256)
+data8 0x3FE4ECA86E64A684 //log(1/frcpa(1+ 236/256)
+data8 0x3FE503C43CD8EB68 //log(1/frcpa(1+ 237/256)
+data8 0x3FE513356667FC57 //log(1/frcpa(1+ 238/256)
+data8 0x3FE522AE0738A3D8 //log(1/frcpa(1+ 239/256)
+data8 0x3FE5322E26867857 //log(1/frcpa(1+ 240/256)
+data8 0x3FE541B5CB979809 //log(1/frcpa(1+ 241/256)
+data8 0x3FE55144FDBCBD62 //log(1/frcpa(1+ 242/256)
+data8 0x3FE560DBC45153C7 //log(1/frcpa(1+ 243/256)
+data8 0x3FE5707A26BB8C66 //log(1/frcpa(1+ 244/256)
+data8 0x3FE587F60ED5B900 //log(1/frcpa(1+ 245/256)
+data8 0x3FE597A7977C8F31 //log(1/frcpa(1+ 246/256)
+data8 0x3FE5A760D634BB8B //log(1/frcpa(1+ 247/256)
+data8 0x3FE5B721D295F10F //log(1/frcpa(1+ 248/256)
+data8 0x3FE5C6EA94431EF9 //log(1/frcpa(1+ 249/256)
+data8 0x3FE5D6BB22EA86F6 //log(1/frcpa(1+ 250/256)
+data8 0x3FE5E6938645D390 //log(1/frcpa(1+ 251/256)
+data8 0x3FE5F673C61A2ED2 //log(1/frcpa(1+ 252/256)
+data8 0x3FE6065BEA385926 //log(1/frcpa(1+ 253/256)
+data8 0x3FE6164BFA7CC06B //log(1/frcpa(1+ 254/256)
+data8 0x3FE62643FECF9743 //log(1/frcpa(1+ 255/256)
+LOCAL_OBJECT_END(log_table_3)
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(acoshf)
+
+{ .mfi
+ getf.exp acosh_GR_f8 = f8
+ fclass.m p6,p0 = f8, 0xc3 // Test for x = NaN
+ mov log_GR_comp2 = 0x10032
+}
+{ .mfi
+ addl NR_table_address = @ltoff(log_table_1), gp
+ fms.s1 log_y = f8, f8, f1 // y = x^2-1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.sig acosh_GR_f8_sig = f8
+ fclass.m p11,p0 = f8, 0x21 // Test for x=+inf
+ mov log_GR_exp_17_ones = 0x1ffff
+}
+{ .mfi
+ ld8 NR_table_address = [NR_table_address]
+ fms.s1 log_w = f8,f1,f1 // w = x - 1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.lt.s1 p7,p8 = f8, f1 // Test for x<1.0
+ addl log_GR_comp = 0x10020C,r0 // Upper 21 bits of signif of 1.0005
+}
+{ .mfb
+ mov log_GR_exp_16_ones = 0xffff //BIAS
+(p6) fma.s.s0 f8 = f8,f1,f0 // quietize nan result if x=nan
+(p6) br.ret.spnt b0 // Exit for x=nan
+}
+;;
+
+{ .mfb
+ //get second table address
+ adds log_table_address2 = 0x20, NR_table_address
+ fcmp.eq.s1 p10,p0 = f8, f1 // Test for x=+1.0
+(p11) br.ret.spnt b0 // Exit for x=+inf
+}
+;;
+
+{ .mfi
+ ldfpd NR1,NR2 = [log_table_address2],16
+ frsqrta.s1 log_y_rs,p0 = log_y // z=1/sqrt(y)
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s1 log_arg = f8,f1,f8
+(p7) br.cond.spnt ACOSH_LESS_ONE // Branch if path 7, x < 1.0
+}
+;;
+
+{ .mfi
+ ldfe log_C2 = [log_table_address2],16
+(p8) fcmp.eq.s0 p6,p0 = f8, f0 // Dummy op sets denorm flag if unorm>=1.0
+ nop.i 0
+}
+{ .mfb
+(p8) cmp.le.unc p13,p0 = log_GR_comp2,acosh_GR_f8
+ nop.f 0
+(p13) br.cond.spnt LOG_COMMON1 // Branch if path 4, x >= 2^51
+}
+;;
+
+{ .mfi
+ ldfe log_C1 = [log_table_address2],16
+(p10) fmerge.s f8 = f0, f0 // Return 0 if x=1.0
+ shr.u acosh_GR_f8_sig = acosh_GR_f8_sig,43
+}
+{ .mib
+ cmp.eq p14,p0 = log_GR_exp_16_ones,acosh_GR_f8
+ nop.i 0
+(p10) br.ret.spnt b0 // Exit for x=1.0
+}
+;;
+
+{ .mfi
+ ldfe log_C0 = [log_table_address2],16
+ frsqrta.s1 acosh_w_rs,p0 = log_w // t=1/sqrt(w)
+ nop.i 0
+}
+{ .mfb
+(p14) cmp.lt.unc p15,p0 = acosh_GR_f8_sig,log_GR_comp
+ nop.f 0
+(p15) br.cond.spnt ACOSH_NEAR_ONE // Branch if path 2, 1.0 < x < 1.0005
+}
+;;
+
+// Here is main path, 1.0005 <= x < 2^51
+/////////////// The first iteration //////////////////////////////////
+{ .mfi
+ ldfpd log_P3,log_P2 = [NR_table_address],16
+ fma.s1 log_y_rs_iter = log_y_rs,log_y,f0 // y*z
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfpd log_P1,log2 = [NR_table_address],16
+ fnma.s1 log_y_rs_iter2 = log_y_rs_iter,log_y_rs,NR2 // 3-(y*z)*z
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs,NR1,f0 // 0.5*z
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // (0.5*z)*(3-(y*z)*z)
+ fma.s1 log_y_rs_iter = log_y_rs_iter1,log_y_rs_iter2,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (0.5*z)*(3-(y*z)*z)
+ fma.s1 log_arg_early = log_y_rs_iter1,log_y_rs_iter2,f0
+ nop.i 0
+}
+;;
+
+/////////////////////////// The second iteration /////////////////////////////
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs = log_y_rs_iter,log_y,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs_iter,NR1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_arg_early = log_arg_early,log_y,f8
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 log_y_rs = log_y_rs,log_y_rs_iter,NR2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_y_rs_iter1 = log_y_rs_iter1,log_y,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ frcpa.s1 log_C,p0 = f1,log_arg_early
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp log_GR_signexp_f8 = log_arg_early
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.sig log_GR_significand_f8 = log_arg_early
+ fma.s1 log_arg = log_y_rs_iter1,log_y_rs,f8 // (0.5*z)*(3-(y*z)*z)
+ adds log_table_address3 = 0x40, NR_table_address
+}
+;;
+
+///////////////////////////////// The end NR iterations /////////////////////
+
+{ .mmi
+ //significant bit destruction
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+;;
+ //BIAS subtraction
+ sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.sig log_int_Nfloat = log_GR_true_exp_f8
+ fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1
+ extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
+}
+;;
+
+{ .mmi
+ //pre-index*8 + index
+ shladd log_table_address3 = log_GR_index,3,log_table_address3
+;;
+ ldfd log_T = [log_table_address3]
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rsq = log_r, log_r, f0 //r^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p10 = log_P1, log_r, f1 //P1*r + 1.0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //convert N to the floating-point format log_Nfloat
+ fcvt.xf log_Nfloat = log_int_Nfloat
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //(P3*r + P2)*r^2 + P1*r + 1.0
+ fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = log_rp_p2,log_r,log_T_plus_Nlog2
+ br.ret.sptk b0 // Exit main path, path 3: 1.0005 <= x < 2^51
+}
+;;
+
+// Here if path 2, 1.0 < x < 1.0005
+ACOSH_NEAR_ONE:
+// The first NR iteration
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter1 = acosh_w_rs,log_w,f0 //t*w
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_1 = f8,log_C2,log_C1 //x*C2 + C1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_rs,NR1,f0 //t*0.5
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 acosh_w_iter1 = acosh_w_iter1,acosh_w_rs,NR2 //3-t*t*w
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //(3-t*t*w)*t*0.5
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_1 = acosh_w_1,log_w,log_C0 //(x*C2 + C1)*(x-1) + C0
+ nop.i 0
+}
+;;
+
+// The second NR iteration
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_rs = acosh_w_iter2,log_w,f0 //t*w
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 acosh_w_iter1 = acosh_w_iter2,acosh_w_rs,NR2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,NR1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_iter2 = acosh_w_iter2,acosh_w_iter1,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 acosh_w_sqrt = acosh_w_iter2,log_w,f0
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = acosh_w_1,acosh_w_sqrt,f0
+ br.ret.sptk b0 // Exit path 2, 1.0 < x < 1.0005
+}
+;;
+
+// Here if path 4, x >= 2^51
+LOG_COMMON1:
+{ .mfi
+ ldfpd log_P3,log_P2 = [NR_table_address],16
+ frcpa.s1 log_C,p0 = f1,log_arg
+ nop.i 0
+}
+;;
+
+{ .mmi
+ getf.exp log_GR_signexp_f8 = log_arg
+ ldfpd log_P1,log2 = [NR_table_address],16
+ nop.i 0
+}
+;;
+
+{ .mmi
+ getf.sig log_GR_significand_f8 = log_arg
+ nop.m 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ adds log_table_address3 = 0x40, NR_table_address
+ nop.f 0
+ //significant bit destruction
+ and log_GR_exp_f8 = log_GR_signexp_f8, log_GR_exp_17_ones
+}
+;;
+
+{ .mmf
+ nop.m 0
+ //BIAS subtraction
+ sub log_GR_true_exp_f8 = log_GR_exp_f8, log_GR_exp_16_ones
+ fms.s1 log_r = log_C,log_arg,f1 // C = frcpa(x); r = C * x - 1
+}
+;;
+
+{ .mfi
+ setf.sig log_int_Nfloat = log_GR_true_exp_f8
+ nop.f 0
+ extr.u log_GR_index = log_GR_significand_f8,55,8 //Extract 8 bits
+}
+;;
+
+{ .mmi
+ //pre-index*8 + index
+ shladd log_table_address3 = log_GR_index,3,log_table_address3
+;;
+ ldfd log_T = [log_table_address3]
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rsq = log_r, log_r, f0 //r^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p32 = log_P3, log_r, log_P2 //P3*r + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p10 = log_P1, log_r, f1 //P1*r + 1.0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ //convert N to the floating-point format log_Nfloat
+ fcvt.xf log_Nfloat = log_int_Nfloat
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 log_rp_p2 = log_rp_p32, log_rsq, log_rp_p10
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 log_T_plus_Nlog2 = log_Nfloat,log2,log_T //N*log2 + T
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = log_rp_p2,log_r,log_T_plus_Nlog2
+ br.ret.sptk b0 // Exit path 4, x >= 2^51
+}
+;;
+
+// Here if path 7, x < 1.0
+ACOSH_LESS_ONE:
+{ .mfi
+ alloc r32 = ar.pfs,1,3,4,0
+ fmerge.s f10 = f8,f8
+ nop.i 0
+}
+;;
+
+{ .mfb
+ mov acosh_GR_tag = 137
+ frcpa.s0 f8,p0 = f0,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+GLOBAL_LIBM_END(acoshf)
+libm_alias_float_other (acosh, acosh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acoshl.S b/sysdeps/ia64/fpu/e_acoshl.S
new file mode 100644
index 0000000000..f35c6bac89
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acoshl.S
@@ -0,0 +1,1712 @@
+.file "acoshl.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 10/01/01 Initial version
+// 10/10/01 Performance inproved
+// 12/11/01 Changed huges_logp to not be global
+// 01/02/02 Corrected .restore syntax
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/14/02 Changed mli templates to mlx
+// 02/06/03 Reorganized data tables
+// 03/31/05 Reformatted delimiters between data tables
+//
+//*********************************************************************
+//
+// API
+//==============================================================
+// long double acoshl(long double);
+//
+// Overview of operation
+//==============================================================
+//
+// There are 6 paths:
+// 1. x = 1
+// Return acoshl(x) = 0;
+//
+// 2. x < 1
+// Return acoshl(x) = Nan (Domain error, error handler call with tag 135);
+//
+// 3. x = [S,Q]Nan or +INF
+// Return acoshl(x) = x + x;
+//
+// 4. 'Near 1': 1 < x < 1+1/8
+// Return acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)),
+// where y = 1, P(y)/Q(y) - rational approximation
+//
+// 5. 'Huges': x > 0.5*2^64
+// Return acoshl(x) = (logl(2*x-1));
+//
+// 6. 'Main path': 1+1/8 < x < 0.5*2^64
+// b_hi + b_lo = x + sqrt(x^2 - 1);
+// acoshl(x) = logl_special(b_hi, b_lo);
+//
+// Algorithm description
+//==============================================================
+//
+// I. Near 1 path algorithm
+// **************************************************************
+// The formula is acoshl(x) = sqrtl(2*y)*(1-P(y)/Q(y)),
+// where y = 1, P(y)/Q(y) - rational approximation
+//
+// 1) y = x - 1, y2 = 2 * y
+//
+// 2) Compute in parallel sqrtl(2*y) and P(y)/Q(y)
+// a) sqrtl computation method described below (main path algorithm, item 2))
+// As result we obtain (gg+gl) - multiprecision result
+// as pair of double extended values
+// b) P(y) and Q(y) calculated without any extra precision manipulations
+// c) P/Q division:
+// y = frcpa(Q) initial approximation of 1/Q
+// z = P*y initial approximation of P/Q
+//
+// e = 1 - b*y
+// e2 = e + e^2
+// e1 = e^2
+// y1 = y + y*e2 = y + y*(e+e^2)
+//
+// e3 = e + e1^2
+// y2 = y + y1*e3 = y + y*(e+e^2+..+e^6)
+//
+// r = P - Q*z
+// e = 1 - Q*y2
+// xx = z + r*y2 high part of a/b
+//
+// y3 = y2 + y2*e4
+// r1 = P - Q*xx
+// xl = r1*y3 low part of a/b
+//
+// 3) res = sqrt(2*y) - sqrt(2*y)*(P(y)/Q(y)) =
+// = (gg+gl) - (gg + gl)*(xx+xl);
+//
+// a) hh = gg*xx; hl = gg*xl; lh = gl*xx; ll = gl*xl;
+// b) res = ((((gl + ll) + lh) + hl) + hh) + gg;
+// (exactly in this order)
+//
+// II. Main path algorithm
+// ( thanks to Peter Markstein for the idea of sqrt(x^2+1) computation! )
+// **********************************************************************
+//
+// There are 3 parts of x+sqrt(x^2-1) computation:
+//
+// 1) m2 = (m2_hi+m2_lo) = x^2-1 obtaining
+// ------------------------------------
+// m2_hi = x2_hi - 1, where x2_hi = x * x;
+// m2_lo = x2_lo + p1_lo, where
+// x2_lo = FMS(x*x-x2_hi),
+// p1_lo = (1 + m2_hi) - x2_hi;
+//
+// 2) g = (g_hi+g_lo) = sqrt(m2) = sqrt(m2_hi+m2_lo)
+// ----------------------------------------------
+// r = invsqrt(m2_hi) (8-bit reciprocal square root approximation);
+// g = m2_hi * r (first 8 bit-approximation of sqrt);
+//
+// h = 0.5 * r;
+// e = 0.5 - g * h;
+// g = g * e + g (second 16 bit-approximation of sqrt);
+//
+// h = h * e + h;
+// e = 0.5 - g * h;
+// g = g * e + g (third 32 bit-approximation of sqrt);
+//
+// h = h * e + h;
+// e = 0.5 - g * h;
+// g_hi = g * e + g (fourth 64 bit-approximation of sqrt);
+//
+// Remainder computation:
+// h = h * e + h;
+// d = (m2_hi - g_hi * g_hi) + m2_lo;
+// g_lo = d * h;
+//
+// 3) b = (b_hi + b_lo) = x + g, where g = (g_hi + g_lo) = sqrt(x^2-1)
+// -------------------------------------------------------------------
+// b_hi = (g_hi + x) + gl;
+// b_lo = (x - b_hi) + g_hi + gl;
+//
+// Now we pass b presented as sum b_hi + b_lo to special version
+// of logl function which accept a pair of arguments as
+// mutiprecision value.
+//
+// Special log algorithm overview
+// ================================
+// Here we use a table lookup method. The basic idea is that in
+// order to compute logl(Arg) for an argument Arg in [1,2),
+// we construct a value G such that G*Arg is close to 1 and that
+// logl(1/G) is obtainable easily from a table of values calculated
+// beforehand. Thus
+//
+// logl(Arg) = logl(1/G) + logl((G*Arg - 1))
+//
+// Because |G*Arg - 1| is small, the second term on the right hand
+// side can be approximated by a short polynomial. We elaborate
+// this method in four steps.
+//
+// Step 0: Initialization
+//
+// We need to calculate logl( X+1 ). Obtain N, S_hi such that
+//
+// X = 2^N * ( S_hi + S_lo ) exactly
+//
+// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+// that |S_lo| <= ulp(S_hi).
+//
+// For the special version of logl: S_lo = b_lo
+// !-----------------------------------------------!
+//
+// Step 1: Argument Reduction
+//
+// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
+//
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1) + G * S_lo
+//
+// These G_j's have the property that the product is exactly
+// representable and that |r| < 2^(-12) as a result.
+//
+// Step 2: Approximation
+//
+// logl(1 + r) is approximated by a short polynomial poly(r).
+//
+// Step 3: Reconstruction
+//
+// Finally, logl( X ) = logl( X+1 ) is given by
+//
+// logl( X ) = logl( 2^N * (S_hi + S_lo) )
+// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
+// ~=~ N*logl(2) + logl(1/G) + poly(r).
+//
+// For detailed description see logl or log1pl function, regular path.
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f32 -> f95 (64 registers)
+
+// General registers used:
+// r32 -> r67 (36 registers)
+
+// Predicate registers used:
+// p7 -> p11
+// p7 for 'NaNs, Inf' path
+// p8 for 'near 1' path
+// p9 for 'huges' path
+// p10 for x = 1
+// p11 for x < 1
+//
+//*********************************************************************
+// IEEE Special Conditions:
+//
+// acoshl(+inf) = +inf
+// acoshl(-inf) = QNaN
+// acoshl(1) = 0
+// acoshl(x<1) = QNaN
+// acoshl(SNaN) = QNaN
+// acoshl(QNaN) = QNaN
+//
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 64
+
+// Near 1 path rational approximation coefficients
+LOCAL_OBJECT_START(Poly_P)
+data8 0xB0978143F695D40F, 0x3FF1 // .84205539791447100108478906277453574946e-4
+data8 0xB9800D841A8CAD29, 0x3FF6 // .28305085180397409672905983082168721069e-2
+data8 0xC889F455758C1725, 0x3FF9 // .24479844297887530847660233111267222945e-1
+data8 0x9BE1DFF006F45F12, 0x3FFB // .76114415657565879842941751209926938306e-1
+data8 0x9E34AF4D372861E0, 0x3FFB // .77248925727776366270605984806795850504e-1
+data8 0xF3DC502AEE14C4AE, 0x3FA6 // .3077953476682583606615438814166025592e-26
+LOCAL_OBJECT_END(Poly_P)
+
+//
+LOCAL_OBJECT_START(Poly_Q)
+data8 0xF76E3FD3C7680357, 0x3FF1 // .11798413344703621030038719253730708525e-3
+data8 0xD107D2E7273263AE, 0x3FF7 // .63791065024872525660782716786703188820e-2
+data8 0xB609BE5CDE206AEF, 0x3FFB // .88885771950814004376363335821980079985e-1
+data8 0xF7DEACAC28067C8A, 0x3FFD // .48412074662702495416825113623936037072302
+data8 0x8F9BE5890CEC7E38, 0x3FFF // 1.1219450873557867470217771071068369729526
+data8 0xED4F06F3D2BC92D1, 0x3FFE // .92698710873331639524734537734804056798748
+LOCAL_OBJECT_END(Poly_Q)
+
+// Q coeffs
+LOCAL_OBJECT_START(Constants_Q)
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000
+data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000
+data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000
+data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000
+data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000
+LOCAL_OBJECT_END(Constants_Q)
+
+// Z1 - 16 bit fixed
+LOCAL_OBJECT_START(Constants_Z_1)
+data4 0x00008000
+data4 0x00007879
+data4 0x000071C8
+data4 0x00006BCB
+data4 0x00006667
+data4 0x00006187
+data4 0x00005D18
+data4 0x0000590C
+data4 0x00005556
+data4 0x000051EC
+data4 0x00004EC5
+data4 0x00004BDB
+data4 0x00004925
+data4 0x0000469F
+data4 0x00004445
+data4 0x00004211
+LOCAL_OBJECT_END(Constants_Z_1)
+
+// G1 and H1 - IEEE single and h1 - IEEE double
+LOCAL_OBJECT_START(Constants_G_H_h1)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F70F0F0,0x3D785196
+data8 0x3DA163A6617D741C
+data4 0x3F638E38,0x3DF13843
+data8 0x3E2C55E6CBD3D5BB
+data4 0x3F579430,0x3E2FF9A0
+data8 0xBE3EB0BFD86EA5E7
+data4 0x3F4CCCC8,0x3E647FD6
+data8 0x3E2E6A8C86B12760
+data4 0x3F430C30,0x3E8B3AE7
+data8 0x3E47574C5C0739BA
+data4 0x3F3A2E88,0x3EA30C68
+data8 0x3E20E30F13E8AF2F
+data4 0x3F321640,0x3EB9CEC8
+data8 0xBE42885BF2C630BD
+data4 0x3F2AAAA8,0x3ECF9927
+data8 0x3E497F3497E577C6
+data4 0x3F23D708,0x3EE47FC5
+data8 0x3E3E6A6EA6B0A5AB
+data4 0x3F1D89D8,0x3EF8947D
+data8 0xBDF43E3CD328D9BE
+data4 0x3F17B420,0x3F05F3A1
+data8 0x3E4094C30ADB090A
+data4 0x3F124920,0x3F0F4303
+data8 0xBE28FBB2FC1FE510
+data4 0x3F0D3DC8,0x3F183EBF
+data8 0x3E3A789510FDE3FA
+data4 0x3F088888,0x3F20EC80
+data8 0x3E508CE57CC8C98F
+data4 0x3F042108,0x3F29516A
+data8 0xBE534874A223106C
+LOCAL_OBJECT_END(Constants_G_H_h1)
+
+// Z2 - 16 bit fixed
+LOCAL_OBJECT_START(Constants_Z_2)
+data4 0x00008000
+data4 0x00007F81
+data4 0x00007F02
+data4 0x00007E85
+data4 0x00007E08
+data4 0x00007D8D
+data4 0x00007D12
+data4 0x00007C98
+data4 0x00007C20
+data4 0x00007BA8
+data4 0x00007B31
+data4 0x00007ABB
+data4 0x00007A45
+data4 0x000079D1
+data4 0x0000795D
+data4 0x000078EB
+LOCAL_OBJECT_END(Constants_Z_2)
+
+// G2 and H2 - IEEE single and h2 - IEEE double
+LOCAL_OBJECT_START(Constants_G_H_h2)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F7F00F8,0x3B7F875D
+data8 0x3DB5A11622C42273
+data4 0x3F7E03F8,0x3BFF015B
+data8 0x3DE620CF21F86ED3
+data4 0x3F7D08E0,0x3C3EE393
+data8 0xBDAFA07E484F34ED
+data4 0x3F7C0FC0,0x3C7E0586
+data8 0xBDFE07F03860BCF6
+data4 0x3F7B1880,0x3C9E75D2
+data8 0x3DEA370FA78093D6
+data4 0x3F7A2328,0x3CBDC97A
+data8 0x3DFF579172A753D0
+data4 0x3F792FB0,0x3CDCFE47
+data8 0x3DFEBE6CA7EF896B
+data4 0x3F783E08,0x3CFC15D0
+data8 0x3E0CF156409ECB43
+data4 0x3F774E38,0x3D0D874D
+data8 0xBE0B6F97FFEF71DF
+data4 0x3F766038,0x3D1CF49B
+data8 0xBE0804835D59EEE8
+data4 0x3F757400,0x3D2C531D
+data8 0x3E1F91E9A9192A74
+data4 0x3F748988,0x3D3BA322
+data8 0xBE139A06BF72A8CD
+data4 0x3F73A0D0,0x3D4AE46F
+data8 0x3E1D9202F8FBA6CF
+data4 0x3F72B9D0,0x3D5A1756
+data8 0xBE1DCCC4BA796223
+data4 0x3F71D488,0x3D693B9D
+data8 0xBE049391B6B7C239
+LOCAL_OBJECT_END(Constants_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 - IEEE double
+LOCAL_OBJECT_START(Constants_G_H_h3)
+data4 0x3F7FFC00,0x38800100
+data8 0x3D355595562224CD
+data4 0x3F7FF400,0x39400480
+data8 0x3D8200A206136FF6
+data4 0x3F7FEC00,0x39A00640
+data8 0x3DA4D68DE8DE9AF0
+data4 0x3F7FE400,0x39E00C41
+data8 0xBD8B4291B10238DC
+data4 0x3F7FDC00,0x3A100A21
+data8 0xBD89CCB83B1952CA
+data4 0x3F7FD400,0x3A300F22
+data8 0xBDB107071DC46826
+data4 0x3F7FCC08,0x3A4FF51C
+data8 0x3DB6FCB9F43307DB
+data4 0x3F7FC408,0x3A6FFC1D
+data8 0xBD9B7C4762DC7872
+data4 0x3F7FBC10,0x3A87F20B
+data8 0xBDC3725E3F89154A
+data4 0x3F7FB410,0x3A97F68B
+data8 0xBD93519D62B9D392
+data4 0x3F7FAC18,0x3AA7EB86
+data8 0x3DC184410F21BD9D
+data4 0x3F7FA420,0x3AB7E101
+data8 0xBDA64B952245E0A6
+data4 0x3F7F9C20,0x3AC7E701
+data8 0x3DB4B0ECAABB34B8
+data4 0x3F7F9428,0x3AD7DD7B
+data8 0x3D9923376DC40A7E
+data4 0x3F7F8C30,0x3AE7D474
+data8 0x3DC6E17B4F2083D3
+data4 0x3F7F8438,0x3AF7CBED
+data8 0x3DAE314B811D4394
+data4 0x3F7F7C40,0x3B03E1F3
+data8 0xBDD46F21B08F2DB1
+data4 0x3F7F7448,0x3B0BDE2F
+data8 0xBDDC30A46D34522B
+data4 0x3F7F6C50,0x3B13DAAA
+data8 0x3DCB0070B1F473DB
+data4 0x3F7F6458,0x3B1BD766
+data8 0xBDD65DDC6AD282FD
+data4 0x3F7F5C68,0x3B23CC5C
+data8 0xBDCDAB83F153761A
+data4 0x3F7F5470,0x3B2BC997
+data8 0xBDDADA40341D0F8F
+data4 0x3F7F4C78,0x3B33C711
+data8 0x3DCD1BD7EBC394E8
+data4 0x3F7F4488,0x3B3BBCC6
+data8 0xBDC3532B52E3E695
+data4 0x3F7F3C90,0x3B43BAC0
+data8 0xBDA3961EE846B3DE
+data4 0x3F7F34A0,0x3B4BB0F4
+data8 0xBDDADF06785778D4
+data4 0x3F7F2CA8,0x3B53AF6D
+data8 0x3DCC3ED1E55CE212
+data4 0x3F7F24B8,0x3B5BA620
+data8 0xBDBA31039E382C15
+data4 0x3F7F1CC8,0x3B639D12
+data8 0x3D635A0B5C5AF197
+data4 0x3F7F14D8,0x3B6B9444
+data8 0xBDDCCB1971D34EFC
+data4 0x3F7F0CE0,0x3B7393BC
+data8 0x3DC7450252CD7ADA
+data4 0x3F7F04F0,0x3B7B8B6D
+data8 0xBDB68F177D7F2A42
+LOCAL_OBJECT_END(Constants_G_H_h3)
+
+// Assembly macros
+//==============================================================
+
+// Floating Point Registers
+
+FR_Arg = f8
+FR_Res = f8
+
+
+FR_PP0 = f32
+FR_PP1 = f33
+FR_PP2 = f34
+FR_PP3 = f35
+FR_PP4 = f36
+FR_PP5 = f37
+FR_QQ0 = f38
+FR_QQ1 = f39
+FR_QQ2 = f40
+FR_QQ3 = f41
+FR_QQ4 = f42
+FR_QQ5 = f43
+
+FR_Q1 = f44
+FR_Q2 = f45
+FR_Q3 = f46
+FR_Q4 = f47
+
+FR_Half = f48
+FR_Two = f49
+
+FR_log2_hi = f50
+FR_log2_lo = f51
+
+
+FR_X2 = f52
+FR_M2 = f53
+FR_M2L = f54
+FR_Rcp = f55
+FR_GG = f56
+FR_HH = f57
+FR_EE = f58
+FR_DD = f59
+FR_GL = f60
+FR_Tmp = f61
+
+
+FR_XM1 = f62
+FR_2XM1 = f63
+FR_XM12 = f64
+
+
+
+ // Special logl registers
+FR_XLog_Hi = f65
+FR_XLog_Lo = f66
+
+FR_Y_hi = f67
+FR_Y_lo = f68
+
+FR_S_hi = f69
+FR_S_lo = f70
+
+FR_poly_lo = f71
+FR_poly_hi = f72
+
+FR_G = f73
+FR_H = f74
+FR_h = f75
+
+FR_G2 = f76
+FR_H2 = f77
+FR_h2 = f78
+
+FR_r = f79
+FR_rsq = f80
+FR_rcub = f81
+
+FR_float_N = f82
+
+FR_G3 = f83
+FR_H3 = f84
+FR_h3 = f85
+
+FR_2_to_minus_N = f86
+
+
+ // Near 1 registers
+FR_PP = f65
+FR_QQ = f66
+
+
+FR_PV6 = f69
+FR_PV4 = f70
+FR_PV3 = f71
+FR_PV2 = f72
+
+FR_QV6 = f73
+FR_QV4 = f74
+FR_QV3 = f75
+FR_QV2 = f76
+
+FR_Y0 = f77
+FR_Q0 = f78
+FR_E0 = f79
+FR_E2 = f80
+FR_E1 = f81
+FR_Y1 = f82
+FR_E3 = f83
+FR_Y2 = f84
+FR_R0 = f85
+FR_E4 = f86
+FR_Y3 = f87
+FR_R1 = f88
+FR_X_Hi = f89
+FR_X_lo = f90
+
+FR_HH = f91
+FR_LL = f92
+FR_HL = f93
+FR_LH = f94
+
+
+
+ // Error handler registers
+FR_Arg_X = f95
+FR_Arg_Y = f0
+
+
+// General Purpose Registers
+
+ // General prolog registers
+GR_PFS = r32
+GR_OneP125 = r33
+GR_TwoP63 = r34
+GR_Arg = r35
+GR_Half = r36
+
+ // Near 1 path registers
+GR_Poly_P = r37
+GR_Poly_Q = r38
+
+ // Special logl registers
+GR_Index1 = r39
+GR_Index2 = r40
+GR_signif = r41
+GR_X_0 = r42
+GR_X_1 = r43
+GR_X_2 = r44
+GR_minus_N = r45
+GR_Z_1 = r46
+GR_Z_2 = r47
+GR_N = r48
+GR_Bias = r49
+GR_M = r50
+GR_Index3 = r51
+GR_exp_2tom80 = r52
+GR_exp_mask = r53
+GR_exp_2tom7 = r54
+GR_ad_ln10 = r55
+GR_ad_tbl_1 = r56
+GR_ad_tbl_2 = r57
+GR_ad_tbl_3 = r58
+GR_ad_q = r59
+GR_ad_z_1 = r60
+GR_ad_z_2 = r61
+GR_ad_z_3 = r62
+
+//
+// Added for unwind support
+//
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+
+GR_Parameter_X = r64
+GR_Parameter_Y = r65
+GR_Parameter_RESULT = r66
+GR_Parameter_TAG = r67
+
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(acoshl)
+
+{ .mfi
+ alloc GR_PFS = ar.pfs,0,32,4,0 // Local frame allocation
+ fcmp.lt.s1 p11, p0 = FR_Arg, f1 // if arg is less than 1
+ mov GR_Half = 0xfffe // 0.5's exp
+}
+{ .mfi
+ addl GR_Poly_Q = @ltoff(Poly_Q), gp // Address of Q-coeff table
+ fma.s1 FR_X2 = FR_Arg, FR_Arg, f0 // Obtain x^2
+ addl GR_Poly_P = @ltoff(Poly_P), gp // Address of P-coeff table
+};;
+
+{ .mfi
+ getf.d GR_Arg = FR_Arg // get argument as double (int64)
+ fma.s0 FR_Two = f1, f1, f1 // construct 2.0
+ addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp // logl tables
+}
+{ .mlx
+ nop.m 0
+ movl GR_TwoP63 = 0x43E8000000000000 // 0.5*2^63 (huge arguments)
+};;
+
+{ .mfi
+ ld8 GR_Poly_P = [GR_Poly_P] // get actual P-coeff table address
+ fcmp.eq.s1 p10, p0 = FR_Arg, f1 // if arg == 1 (return 0)
+ nop.i 0
+}
+{ .mlx
+ ld8 GR_Poly_Q = [GR_Poly_Q] // get actual Q-coeff table address
+ movl GR_OneP125 = 0x3FF2000000000000 // 1.125 (near 1 path bound)
+};;
+
+{ .mfi
+ ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1
+ fclass.m p7,p0 = FR_Arg, 0xe3 // if arg NaN inf
+ cmp.le p9, p0 = GR_TwoP63, GR_Arg // if arg > 0.5*2^63 ('huges')
+}
+{ .mfb
+ cmp.ge p8, p0 = GR_OneP125, GR_Arg // if arg<1.125 -near 1 path
+ fms.s1 FR_XM1 = FR_Arg, f1, f1 // X0 = X-1 (for near 1 path)
+(p11) br.cond.spnt acoshl_lt_pone // error branch (less than 1)
+};;
+
+{ .mmi
+ setf.exp FR_Half = GR_Half // construct 0.5
+(p9) setf.s FR_XLog_Lo = r0 // Low of logl arg=0 (Huges path)
+ mov GR_exp_mask = 0x1FFFF // Create exponent mask
+};;
+
+{ .mmf
+(p8) ldfe FR_PP5 = [GR_Poly_P],16 // Load P5
+(p8) ldfe FR_QQ5 = [GR_Poly_Q],16 // Load Q5
+ fms.s1 FR_M2 = FR_X2, f1, f1 // m2 = x^2 - 1
+};;
+
+{ .mfi
+(p8) ldfe FR_QQ4 = [GR_Poly_Q],16 // Load Q4
+ fms.s1 FR_M2L = FR_Arg, FR_Arg, FR_X2 // low part of
+ // m2 = fma(X*X - m2)
+ add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1
+}
+{ .mfb
+(p8) ldfe FR_PP4 = [GR_Poly_P],16 // Load P4
+(p7) fma.s0 FR_Res = FR_Arg,f1,FR_Arg // r = a + a (Nan, Inf)
+(p7) br.ret.spnt b0 // return (Nan, Inf)
+};;
+
+{ .mfi
+(p8) ldfe FR_PP3 = [GR_Poly_P],16 // Load P3
+ nop.f 0
+ add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_P
+}
+{ .mfb
+(p8) ldfe FR_QQ3 = [GR_Poly_Q],16 // Load Q3
+(p9) fms.s1 FR_XLog_Hi = FR_Two, FR_Arg, f1 // Hi of log arg = 2*X-1
+(p9) br.cond.spnt huges_logl // special version of log
+}
+;;
+
+{ .mfi
+(p8) ldfe FR_PP2 = [GR_Poly_P],16 // Load P2
+(p8) fma.s1 FR_2XM1 = FR_Two, FR_XM1, f0 // 2X0 = 2 * X0
+ add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
+}
+{ .mfb
+(p8) ldfe FR_QQ2 = [GR_Poly_Q],16 // Load Q2
+(p10) fma.s0 FR_Res = f0,f1,f0 // r = 0 (arg = 1)
+(p10) br.ret.spnt b0 // return (arg = 1)
+};;
+
+{ .mmi
+(p8) ldfe FR_PP1 = [GR_Poly_P],16 // Load P1
+(p8) ldfe FR_QQ1 = [GR_Poly_Q],16 // Load Q1
+ add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2
+}
+;;
+
+{ .mfi
+(p8) ldfe FR_PP0 = [GR_Poly_P] // Load P0
+ fma.s1 FR_Tmp = f1, f1, FR_M2 // Tmp = 1 + m2
+ add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3
+}
+{ .mfb
+(p8) ldfe FR_QQ0 = [GR_Poly_Q]
+ nop.f 0
+(p8) br.cond.spnt near_1 // near 1 path
+};;
+{ .mfi
+ ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
+ nop.f 0
+ mov GR_Bias = 0x0FFFF // Create exponent bias
+};;
+{ .mfi
+ nop.m 0
+ frsqrta.s1 FR_Rcp, p0 = FR_M2 // Rcp = 1/m2 reciprocal appr.
+ nop.i 0
+};;
+
+{ .mfi
+ ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
+ fms.s1 FR_Tmp = FR_X2, f1, FR_Tmp // Tmp = x^2 - Tmp
+ nop.i 0
+};;
+
+{ .mfi
+ ldfe FR_Q4 = [GR_ad_q],16 // Load Q4
+ fma.s1 FR_GG = FR_Rcp, FR_M2, f0 // g = Rcp * m2
+ // 8 bit Newton Raphson iteration
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp
+ nop.i 0
+};;
+{ .mfi
+ ldfe FR_Q3 = [GR_ad_q],16 // Load Q3
+ fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_M2L = FR_Tmp, f1, FR_M2L // low part of m2 = Tmp+m2l
+ nop.i 0
+};;
+
+{ .mfi
+ ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
+ fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
+ // 16 bit Newton Raphson iteration
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h
+ nop.i 0
+};;
+
+{ .mfi
+ ldfe FR_Q1 = [GR_ad_q] // Load Q1
+ fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
+ // 32 bit Newton Raphson iteration
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g
+ // 64 bit Newton Raphson iteration
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_DD = FR_GG, FR_GG, FR_M2 // Remainder d = g * g - p2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_XLog_Hi = FR_Arg, f1, FR_GG // bh = z + gh
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_DD = FR_DD, f1, FR_M2L // add p2l: d = d + p2l
+ nop.i 0
+};;
+
+{ .mfi
+ getf.sig GR_signif = FR_XLog_Hi // Get significand of x+1
+ nop.f 0
+ mov GR_exp_2tom7 = 0x0fff8 // Exponent of 2^-7
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GL = FR_DD, FR_HH, f0 // gl = d * h
+ extr.u GR_Index1 = GR_signif, 59, 4 // Get high 4 bits of signif
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_XLog_Hi = FR_DD, FR_HH, FR_XLog_Hi // bh = bh + gl
+ nop.i 0
+};;
+
+
+
+{ .mmi
+ shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
+ shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1 // Point to G_1
+ extr.u GR_X_0 = GR_signif, 49, 15 // Get high 15 bits of signif.
+};;
+
+{ .mmi
+ ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_XLog_Lo = FR_Arg, f1, FR_XLog_Hi // bl = x - bh
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // Get bits 30-15 of X_0 * Z_1
+};;
+
+// WE CANNOT USE GR_X_1 IN NEXT 3 CYCLES BECAUSE OF POSSIBLE 10 CLOCKS STALL!
+// "DEAD" ZONE!
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fmerge.se FR_S_hi = f1,FR_XLog_Hi // Form |x+1|
+ nop.i 0
+};;
+
+
+{ .mmi
+ getf.exp GR_N = FR_XLog_Hi // Get N = exponent of x+1
+ ldfd FR_h = [GR_ad_tbl_1] // Load h_1
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
+};;
+
+{ .mfi
+ shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2
+ fma.s1 FR_XLog_Lo = FR_XLog_Lo, f1, FR_GG // bl = bl + gg
+ mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80
+}
+{ .mfi
+ shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
+ nop.f 0
+ sub GR_N = GR_N, GR_Bias // sub bias from exp
+};;
+
+{ .mmi
+ ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
+ ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
+ sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N)
+};;
+
+{ .mmi
+ ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ setf.sig FR_float_N = GR_N // Put integer N into rightmost sign
+ setf.exp FR_2_to_minus_N = GR_minus_N // Form 2^(-N)
+ pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2
+};;
+
+// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
+// BECAUSE OF POSSIBLE 10 CLOCKS STALL!
+// (Just nops added - nothing to do here)
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_XLog_Lo = FR_XLog_Lo, f1, FR_GL // bl = bl + gl
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
+};;
+
+{ .mfi
+ shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
+ fcvt.xf FR_float_N = FR_float_N
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_S_lo = FR_XLog_Lo, FR_2_to_minus_N, f0 //S_lo=S_lo*2^(-N)
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi=N*log2_hi+H
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h=N*log2_lo+h
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_r = FR_G, FR_S_lo, FR_r // r=G*S_lo+(G*S_hi-1)
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo=poly_lo*r+Q2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo
+ // Y_lo=poly_hi+poly_lo
+ nop.i 0
+};;
+
+{ .mfb
+ nop.m 0
+ fadd.s0 FR_Res = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi
+ br.ret.sptk b0 // Common exit for 2^-7 < x < inf
+};;
+
+
+huges_logl:
+{ .mmi
+ getf.sig GR_signif = FR_XLog_Hi // Get significand of x+1
+ mov GR_exp_2tom7 = 0x0fff8 // Exponent of 2^-7
+ nop.i 0
+};;
+
+{ .mfi
+ add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1
+ nop.f 0
+ add GR_ad_q = -0x60, GR_ad_z_1 // Point to Constants_P
+}
+{ .mfi
+ add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
+ nop.f 0
+ add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2
+};;
+
+{ .mfi
+ add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3
+ nop.f 0
+ extr.u GR_Index1 = GR_signif, 59, 4 // Get high 4 bits of signif
+};;
+
+{ .mfi
+ shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
+ nop.f 0
+ extr.u GR_X_0 = GR_signif, 49, 15 // Get high 15 bits of signif.
+};;
+
+{ .mfi
+ ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
+ nop.f 0
+ mov GR_exp_mask = 0x1FFFF // Create exponent mask
+}
+{ .mfi
+ shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1 // Point to G_1
+ nop.f 0
+ mov GR_Bias = 0x0FFFF // Create exponent bias
+};;
+
+{ .mfi
+ ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
+ fmerge.se FR_S_hi = f1,FR_XLog_Hi // Form |x|
+ nop.i 0
+};;
+
+{ .mmi
+ getf.exp GR_N = FR_XLog_Hi // Get N = exponent of x+1
+ ldfd FR_h = [GR_ad_tbl_1] // Load h_1
+ nop.i 0
+};;
+
+{ .mfi
+ ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
+ nop.f 0
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // Get bits 30-15 of X_0 * Z_1
+};;
+
+{ .mmi
+ ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
+ sub GR_N = GR_N, GR_Bias
+ mov GR_exp_2tom80 = 0x0ffaf // Exponent of 2^-80
+};;
+
+{ .mfi
+ ldfe FR_Q4 = [GR_ad_q],16 // Load Q4
+ nop.f 0
+ sub GR_minus_N = GR_Bias, GR_N // Form exponent of 2^(-N)
+};;
+
+{ .mmf
+ ldfe FR_Q3 = [GR_ad_q],16 // Load Q3
+ setf.sig FR_float_N = GR_N // Put integer N into rightmost sign
+ nop.f 0
+};;
+
+{ .mmi
+ ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
+ nop.m 0
+ extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
+};;
+
+{ .mmi
+ ldfe FR_Q1 = [GR_ad_q] // Load Q1
+ shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
+ nop.i 0
+};;
+
+{ .mmi
+ ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
+ shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2
+ nop.i 0
+};;
+
+{ .mmi
+ ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmf
+ ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2
+ setf.exp FR_2_to_minus_N = GR_minus_N // Form 2^(-N)
+ nop.f 0
+};;
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1*Z_2
+};;
+
+// WE CANNOT USE GR_X_2 IN NEXT 3 CYCLES ("DEAD" ZONE!)
+// BECAUSE OF POSSIBLE 10 CLOCKS STALL!
+// (Just nops added - nothing to do here)
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ nop.f 0
+ extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
+};;
+
+{ .mfi
+ shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3
+ fcvt.xf FR_float_N = FR_float_N
+ nop.i 0
+};;
+
+{ .mfi
+ ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
+ fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
+ nop.i 0
+};;
+
+{ .mmf
+ nop.m 0
+ nop.m 0
+ fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
+};;
+
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2)*G_3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2)+H_3
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi=N*log2_hi+H
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h = N*log2_lo+h
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo=poly_lo*r+Q2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1*rsq + r
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h//poly_lo=poly_lo*r^3+h
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fadd.s0 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo=poly_hi+poly_lo
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+ fadd.s0 FR_Res = FR_Y_lo,FR_Y_hi // Result=Y_lo+Y_hi
+ br.ret.sptk b0 // Common exit
+};;
+
+
+// NEAR ONE INTERVAL
+near_1:
+{ .mfi
+ nop.m 0
+ frsqrta.s1 FR_Rcp, p0 = FR_2XM1 // Rcp = 1/x reciprocal appr. &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_PV6 = FR_PP5, FR_XM1, FR_PP4 // pv6 = P5*xm1+P4 $POLY$
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_QV6 = FR_QQ5, FR_XM1, FR_QQ4 // qv6 = Q5*xm1+Q4 $POLY$
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_PV4 = FR_PP3, FR_XM1, FR_PP2 // pv4 = P3*xm1+P2 $POLY$
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_QV4 = FR_QQ3, FR_XM1, FR_QQ2 // qv4 = Q3*xm1+Q2 $POLY$
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_XM12 = FR_XM1, FR_XM1, f0 // xm1^2 = xm1 * xm1 $POLY$
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_PV2 = FR_PP1, FR_XM1, FR_PP0 // pv2 = P1*xm1+P0 $POLY$
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_QV2 = FR_QQ1, FR_XM1, FR_QQ0 // qv2 = Q1*xm1+Q0 $POLY$
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GG = FR_Rcp, FR_2XM1, f0 // g = Rcp * x &SQRT&
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_Half, FR_Rcp, f0 // h = 0.5 * Rcp &SQRT&
+ nop.i 0
+};;
+
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_PV3 = FR_XM12, FR_PV6, FR_PV4//pv3=pv6*xm1^2+pv4 $POLY$
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_QV3 = FR_XM12, FR_QV6, FR_QV4//qv3=qv6*xm1^2+qv4 $POLY$
+ nop.i 0
+};;
+
+
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_PP = FR_XM12, FR_PV3, FR_PV2 //pp=pv3*xm1^2+pv2 $POLY$
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_QQ = FR_XM12, FR_QV3, FR_QV2 //qq=qv3*xm1^2+qv2 $POLY$
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT&
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ frcpa.s1 FR_Y0,p0 = f1,FR_QQ // y = frcpa(b) #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g*h &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Q0 = FR_PP,FR_Y0,f0 // q = a*y #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_E0 = FR_Y0,FR_QQ,f1 // e = 1 - b*y #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GG = FR_GG, FR_EE, FR_GG // g = g * e + g &SQRT&
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_E2 = FR_E0,FR_E0,FR_E0 // e2 = e+e^2 #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_E1 = FR_E0,FR_E0,f0 // e1 = e^2 #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_EE = FR_GG, FR_HH, FR_Half // e = 0.5 - g * h &SQRT&
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y1 = FR_Y0,FR_E2,FR_Y0 // y1 = y+y*e2 #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_E3 = FR_E1,FR_E1,FR_E0 // e3 = e+e1^2 #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GG = FR_DD, FR_HH, FR_GG // g = d * h + g &SQRT&
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_HH, FR_EE, FR_HH // h = h * e + h &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y2 = FR_Y1,FR_E3,FR_Y0 // y2 = y+y1*e3 #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_R0 = FR_QQ,FR_Q0,FR_PP // r = a-b*q #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_DD = FR_GG, FR_GG, FR_2XM1 // d = x - g * g &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_E4 = FR_QQ,FR_Y2,f1 // e4 = 1-b*y2 #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_X_Hi = FR_R0,FR_Y2,FR_Q0 // x = q+r*y2 #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_GL = FR_DD, FR_HH, f0 // gl = d * h &SQRT&
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y3 = FR_Y2,FR_E4,FR_Y2 // y3 = y2+y2*e4 #DIV#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_R1 = FR_QQ,FR_X_Hi,FR_PP // r1 = a-b*x #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HH = FR_GG, FR_X_Hi, f0 // hh = gg * x_hi
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_LH = FR_GL, FR_X_Hi, f0 // lh = gl * x_hi
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3 #DIV#
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_LL = FR_GL, FR_X_lo, f0 // ll = gl*x_lo
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_HL = FR_GG, FR_X_lo, f0 // hl = gg * x_lo
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_Res = FR_GL, f1, FR_LL // res = gl + ll
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_Res = FR_Res, f1, FR_LH // res = res + lh
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_Res = FR_Res, f1, FR_HL // res = res + hl
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ fms.s1 FR_Res = FR_Res, f1, FR_HH // res = res + hh
+ nop.i 0
+};;
+
+{ .mfb
+ nop.m 0
+ fma.s0 FR_Res = FR_Res, f1, FR_GG // result = res + gg
+ br.ret.sptk b0 // Exit for near 1 path
+};;
+// NEAR ONE INTERVAL END
+
+
+
+
+acoshl_lt_pone:
+{ .mfi
+ nop.m 0
+ fmerge.s FR_Arg_X = FR_Arg, FR_Arg
+ nop.i 0
+};;
+{ .mfb
+ mov GR_Parameter_TAG = 135
+ frcpa.s0 FR_Res,p0 = f0,f0 // get QNaN,and raise invalid
+ br.cond.sptk __libm_error_region // exit if x < 1.0
+};;
+
+GLOBAL_LIBM_END(acoshl)
+libm_alias_ldouble_other (acosh, acosh)
+
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y = -32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp = -64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP = gp // Save gp
+};;
+
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Arg_Y,16 // Parameter 2 to stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0,GR_SAVE_B0
+ mov GR_SAVE_B0 = b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_Arg_X // Parameter 1 to stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_Res // Parameter 3 to stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0 = __libm_error_support# // Error handling function
+};;
+
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return res
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region#)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_acosl.S b/sysdeps/ia64/fpu/e_acosl.S
new file mode 100644
index 0000000000..4c24befed0
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_acosl.S
@@ -0,0 +1,2546 @@
+.file "acosl.s"
+
+
+// Copyright (c) 2001 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 08/28/01 New version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// long double acosl(long double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// For |s| in [2^{-4}, sqrt(2)/2]:
+// Let t= 2^k*1.b1 b2..b6 1, where s= 2^k*1.b1 b2.. b52
+// acos(s)= pi/2-asin(t)-asin(r), where r= s*sqrt(1-t^2)-t*sqrt(1-s^2), i.e.
+// r= (s-t)*sqrt(1-t^2)-t*sqrt(1-t^2)*(sqrt((1-s^2)/(1-t^2))-1)
+// asin(r)-r evaluated as 9-degree polynomial (c3*r^3+c5*r^5+c7*r^7+c9*r^9)
+// The 64-bit significands of sqrt(1-t^2), 1/(1-t^2) are read from the table,
+// along with the high and low parts of asin(t) (stored as two double precision
+// values)
+//
+// |s| in (sqrt(2)/2, sqrt(255/256)):
+// Let t= 2^k*1.b1 b2..b6 1, where (1-s^2)*frsqrta(1-s^2)= 2^k*1.b1 b2..b6..
+// acos(|s|)= asin(t)-asin(r)
+// acos(-|s|)=pi-asin(t)+asin(r), r= s*t-sqrt(1-s^2)*sqrt(1-t^2)
+// To minimize accumulated errors, r is computed as
+// r= (t*s)_s-t^2*y*z+z*y*(t^2-1+s^2)_s+z*y*(1-s^2)_s*x+z'*y*(1-s^2)*PS29+
+// +(t*s-(t*s)_s)+z*y*((t^2-1-(t^2-1+s^2)_s)+s^2)+z*y*(1-s^2-(1-s^2)_s)+
+// +ez*z'*y*(1-s^2)*(1-x),
+// where y= frsqrta(1-s^2), z= (sqrt(1-t^2))_s (rounded to 24 significant bits)
+// z'= sqrt(1-t^2), x= ((1-s^2)*y^2-1)/2
+//
+// |s|<2^{-4}: evaluate asin(s) as 17-degree polynomial, return pi/2-asin(s)
+// (or simply return pi/2-s, if|s|<2^{-64})
+//
+// |s| in [sqrt(255/256), 1): acos(|s|)= asin(sqrt(1-s^2))
+// acos(-|s|)= pi-asin(sqrt(1-s^2))
+// use 17-degree polynomial for asin(sqrt(1-s^2)),
+// 9-degree polynomial to evaluate sqrt(1-s^2)
+// High order term is (pi)_high-(y*(1-s^2))_high, for s<0,
+// or y*(1-s^2)_s, for s>0
+//
+
+
+
+// Registers used
+//==============================================================
+// f6-f15, f32-f36
+// r2-r3, r23-r23
+// p6, p7, p8, p12
+//
+
+
+ GR_SAVE_B0= r33
+ GR_SAVE_PFS= r34
+ GR_SAVE_GP= r35 // This reg. can safely be used
+ GR_SAVE_SP= r36
+
+ GR_Parameter_X= r37
+ GR_Parameter_Y= r38
+ GR_Parameter_RESULT= r39
+ GR_Parameter_TAG= r40
+
+ FR_X= f10
+ FR_Y= f1
+ FR_RESULT= f8
+
+
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(T_table)
+
+// stores 64-bit significand of 1/(1-t^2), 64-bit significand of sqrt(1-t^2),
+// asin(t)_high (double precision), asin(t)_low (double precision)
+
+data8 0x80828692b71c4391, 0xff7ddcec2d87e879
+data8 0x3fb022bc0ae531a0, 0x3c9f599c7bb42af6
+data8 0x80869f0163d0b082, 0xff79cad2247914d3
+data8 0x3fb062dd26afc320, 0x3ca4eff21bd49c5c
+data8 0x808ac7d5a8690705, 0xff75a89ed6b626b9
+data8 0x3fb0a2ff4a1821e0, 0x3cb7e33b58f164cc
+data8 0x808f0112ad8ad2e0, 0xff7176517c2cc0cb
+data8 0x3fb0e32279319d80, 0x3caee31546582c43
+data8 0x80934abba8a1da0a, 0xff6d33e949b1ed31
+data8 0x3fb12346b8101da0, 0x3cb8bfe463d087cd
+data8 0x8097a4d3dbe63d8f, 0xff68e16571015c63
+data8 0x3fb1636c0ac824e0, 0x3c8870a7c5a3556f
+data8 0x809c0f5e9662b3dd, 0xff647ec520bca0f0
+data8 0x3fb1a392756ed280, 0x3c964f1a927461ae
+data8 0x80a08a5f33fadc66, 0xff600c07846a6830
+data8 0x3fb1e3b9fc19e580, 0x3c69eb3576d56332
+data8 0x80a515d91d71acd4, 0xff5b892bc475affa
+data8 0x3fb223e2a2dfbe80, 0x3c6a4e19fd972fb6
+data8 0x80a9b1cfc86ff7cd, 0xff56f631062cf93d
+data8 0x3fb2640c6dd76260, 0x3c62041160e0849e
+data8 0x80ae5e46b78b0d68, 0xff5253166bc17794
+data8 0x3fb2a43761187c80, 0x3cac61651af678c0
+data8 0x80b31b417a4b756b, 0xff4d9fdb14463dc8
+data8 0x3fb2e46380bb6160, 0x3cb06ef23eeba7a1
+data8 0x80b7e8c3ad33c369, 0xff48dc7e1baf6738
+data8 0x3fb32490d0d910c0, 0x3caa05f480b300d5
+data8 0x80bcc6d0f9c784d6, 0xff4408fe9ad13e37
+data8 0x3fb364bf558b3820, 0x3cb01e7e403aaab9
+data8 0x80c1b56d1692492d, 0xff3f255ba75f5f4e
+data8 0x3fb3a4ef12ec3540, 0x3cb4fe8fcdf5f5f1
+data8 0x80c6b49bc72ec446, 0xff3a319453ebd961
+data8 0x3fb3e5200d171880, 0x3caf2dc089b2b7e2
+data8 0x80cbc460dc4e0ae8, 0xff352da7afe64ac6
+data8 0x3fb425524827a720, 0x3cb75a855e7c6053
+data8 0x80d0e4c033bee9c4, 0xff301994c79afb32
+data8 0x3fb46585c83a5e00, 0x3cb3264981c019ab
+data8 0x80d615bdb87556db, 0xff2af55aa431f291
+data8 0x3fb4a5ba916c73c0, 0x3c994251d94427b5
+data8 0x80db575d6291fd8a, 0xff25c0f84bae0cb9
+data8 0x3fb4e5f0a7dbdb20, 0x3cbee2fcc4c786cb
+data8 0x80e0a9a33769e535, 0xff207c6cc0ec09fd
+data8 0x3fb526280fa74620, 0x3c940656e5549b91
+data8 0x80e60c93498e32cd, 0xff1b27b703a19c98
+data8 0x3fb56660ccee2740, 0x3ca7082374d7b2cd
+data8 0x80eb8031b8d4052d, 0xff15c2d6105c72f8
+data8 0x3fb5a69ae3d0b520, 0x3c7c4d46e09ac68a
+data8 0x80f10482b25c6c8a, 0xff104dc8e0813ed4
+data8 0x3fb5e6d6586fec20, 0x3c9aa84ffd9b4958
+data8 0x80f6998a709c7cfb, 0xff0ac88e6a4ab926
+data8 0x3fb627132eed9140, 0x3cbced2cbbbe7d16
+data8 0x80fc3f4d3b657c44, 0xff053325a0c8a2ec
+data8 0x3fb667516b6c34c0, 0x3c6489c5fc68595a
+data8 0x8101f5cf67ed2af8, 0xfeff8d8d73dec2bb
+data8 0x3fb6a791120f33a0, 0x3cbe12acf159dfad
+data8 0x8107bd1558d6291f, 0xfef9d7c4d043df29
+data8 0x3fb6e7d226fabba0, 0x3ca386d099cd0dc7
+data8 0x810d95237e38766a, 0xfef411ca9f80b5f7
+data8 0x3fb72814ae53cc20, 0x3cb9f35731e71dd6
+data8 0x81137dfe55aa0e29, 0xfeee3b9dc7eef009
+data8 0x3fb76858ac403a00, 0x3c74df3dd959141a
+data8 0x811977aa6a479f0f, 0xfee8553d2cb8122c
+data8 0x3fb7a89e24e6b0e0, 0x3ca6034406ee42bc
+data8 0x811f822c54bd5ef8, 0xfee25ea7add46a91
+data8 0x3fb7e8e51c6eb6a0, 0x3cb82f8f78e68ed7
+data8 0x81259d88bb4ffac1, 0xfedc57dc2809fb1d
+data8 0x3fb8292d9700ad60, 0x3cbebb73c0e653f9
+data8 0x812bc9c451e5a257, 0xfed640d974eb6068
+data8 0x3fb8697798c5d620, 0x3ca2feee76a9701b
+data8 0x813206e3da0f3124, 0xfed0199e6ad6b585
+data8 0x3fb8a9c325e852e0, 0x3cb9e88f2f4d0efe
+data8 0x813854ec231172f9, 0xfec9e229dcf4747d
+data8 0x3fb8ea1042932a00, 0x3ca5ff40d81f66fd
+data8 0x813eb3e209ee858f, 0xfec39a7a9b36538b
+data8 0x3fb92a5ef2f247c0, 0x3cb5e3bece4d6b07
+data8 0x814523ca796f56ce, 0xfebd428f72561efe
+data8 0x3fb96aaf3b3281a0, 0x3cb7b9e499436d7c
+data8 0x814ba4aa6a2d3ff9, 0xfeb6da672bd48fe4
+data8 0x3fb9ab011f819860, 0x3cb9168143cc1a7f
+data8 0x81523686e29bbdd7, 0xfeb062008df81f50
+data8 0x3fb9eb54a40e3ac0, 0x3cb6e544197eb1e1
+data8 0x8158d964f7124614, 0xfea9d95a5bcbd65a
+data8 0x3fba2ba9cd080800, 0x3ca9a717be8f7446
+data8 0x815f8d49c9d639e4, 0xfea34073551e1ac8
+data8 0x3fba6c009e9f9260, 0x3c741e989a60938a
+data8 0x8166523a8b24f626, 0xfe9c974a367f785c
+data8 0x3fbaac591d0661a0, 0x3cb2c1290107e57d
+data8 0x816d283c793e0114, 0xfe95ddddb94166cb
+data8 0x3fbaecb34c6ef600, 0x3c9c7d5fbaec405d
+data8 0x81740f54e06d55bd, 0xfe8f142c93750c50
+data8 0x3fbb2d0f310cca00, 0x3cbc09479a9cbcfb
+data8 0x817b07891b15cd5e, 0xfe883a3577e9fceb
+data8 0x3fbb6d6ccf1455e0, 0x3cb9450bff4ee307
+data8 0x818210de91bba6c8, 0xfe814ff7162cf62f
+data8 0x3fbbadcc2abb1180, 0x3c9227fda12a8d24
+data8 0x81892b5abb0f2bf9, 0xfe7a55701a8697b1
+data8 0x3fbbee2d48377700, 0x3cb6fad72acfe356
+data8 0x819057031bf7760e, 0xfe734a9f2dfa1810
+data8 0x3fbc2e902bc10600, 0x3cb4465b588d16ad
+data8 0x819793dd479d4fbe, 0xfe6c2f82f643f68b
+data8 0x3fbc6ef4d9904580, 0x3c8b9ac54823960d
+data8 0x819ee1eedf76367a, 0xfe65041a15d8a92c
+data8 0x3fbcaf5b55dec6a0, 0x3ca2b8d28a954db2
+data8 0x81a6413d934f7a66, 0xfe5dc8632be3477f
+data8 0x3fbcefc3a4e727a0, 0x3c9380da83713ab4
+data8 0x81adb1cf21597d4b, 0xfe567c5cd44431d5
+data8 0x3fbd302dcae51600, 0x3ca995b83421756a
+data8 0x81b533a9563310b8, 0xfe4f2005a78fb50f
+data8 0x3fbd7099cc155180, 0x3caefa2f7a817d5f
+data8 0x81bcc6d20cf4f373, 0xfe47b35c3b0caaeb
+data8 0x3fbdb107acb5ae80, 0x3cb455fc372dd026
+data8 0x81c46b4f2f3d6e68, 0xfe40365f20b316d6
+data8 0x3fbdf177710518c0, 0x3cbee3dcc5b01434
+data8 0x81cc2126b53c1144, 0xfe38a90ce72abf36
+data8 0x3fbe31e91d439620, 0x3cb3e131c950aebd
+data8 0x81d3e85ea5bd8ee2, 0xfe310b6419c9c33a
+data8 0x3fbe725cb5b24900, 0x3c01d3fac6029027
+data8 0x81dbc0fd1637b9c1, 0xfe295d6340932d15
+data8 0x3fbeb2d23e937300, 0x3c6304cc44aeedd1
+data8 0x81e3ab082ad5a0a4, 0xfe219f08e03580b3
+data8 0x3fbef349bc2a77e0, 0x3cac1d2d6abe9c72
+data8 0x81eba6861683cb97, 0xfe19d0537a0946e2
+data8 0x3fbf33c332bbe020, 0x3ca0909dba4e96ca
+data8 0x81f3b37d1afc9979, 0xfe11f1418c0f94e2
+data8 0x3fbf743ea68d5b60, 0x3c937fc12a2a779a
+data8 0x81fbd1f388d4be45, 0xfe0a01d190f09063
+data8 0x3fbfb4bc1be5c340, 0x3cbf51a504b55813
+data8 0x820401efbf87e248, 0xfe020201fff9efea
+data8 0x3fbff53b970d1e80, 0x3ca625444b260078
+data8 0x82106ad2ffdca049, 0xfdf5e3940a49135e
+data8 0x3fc02aff52065460, 0x3c9125d113e22a57
+data8 0x8221343d6ea1d3e2, 0xfde581a45429b0a0
+data8 0x3fc06b84f8e03220, 0x3caccf362295894b
+data8 0x82324434adbf99c2, 0xfdd4de1a001fb775
+data8 0x3fc0ac0ed1fe7240, 0x3cc22f676096b0af
+data8 0x82439aee8d0c7747, 0xfdc3f8e8269d1f03
+data8 0x3fc0ec9cee9e4820, 0x3cca147e2886a628
+data8 0x825538a1d0fcb2f0, 0xfdb2d201a9b1ba66
+data8 0x3fc12d2f6006f0a0, 0x3cc72b36633bc2d4
+data8 0x82671d86345c5cee, 0xfda1695934d723e7
+data8 0x3fc16dc63789de60, 0x3cb11f9c47c7b83f
+data8 0x827949d46a121770, 0xfd8fbee13cbbb823
+data8 0x3fc1ae618682e620, 0x3cce1b59020cef8e
+data8 0x828bbdc61eeab9ba, 0xfd7dd28bff0c9f34
+data8 0x3fc1ef015e586c40, 0x3cafec043e0225ee
+data8 0x829e7995fb6de9e1, 0xfd6ba44b823ee1ca
+data8 0x3fc22fa5d07b90c0, 0x3cba905409caf8e3
+data8 0x82b17d7fa5bbc982, 0xfd5934119557883a
+data8 0x3fc2704eee685da0, 0x3cb5ef21838a823e
+data8 0x82c4c9bfc373d276, 0xfd4681cfcfb2c161
+data8 0x3fc2b0fcc9a5f3e0, 0x3ccc7952c5e0e312
+data8 0x82d85e93fba50136, 0xfd338d7790ca0f41
+data8 0x3fc2f1af73c6ba00, 0x3cbecf5f977d1ca9
+data8 0x82ec3c3af8c76b32, 0xfd2056f9fff97727
+data8 0x3fc33266fe6889a0, 0x3c9d329c022ebdb5
+data8 0x830062f46abf6022, 0xfd0cde480c43b327
+data8 0x3fc373237b34de60, 0x3cc95806d4928adb
+data8 0x8314d30108ea35f0, 0xfcf923526c1562b2
+data8 0x3fc3b3e4fbe10520, 0x3cbc299fe7223d54
+data8 0x83298ca29434df97, 0xfce526099d0737ed
+data8 0x3fc3f4ab922e4a60, 0x3cb59d8bb8fdbccc
+data8 0x833e901bd93c7009, 0xfcd0e65de39f1f7c
+data8 0x3fc435774fea2a60, 0x3c9ec18b43340914
+data8 0x8353ddb0b278aad8, 0xfcbc643f4b106055
+data8 0x3fc4764846ee80a0, 0x3cb90402efd87ed6
+data8 0x836975a60a70c52e, 0xfca79f9da4fab13a
+data8 0x3fc4b71e8921b860, 0xbc58f23449ed6365
+data8 0x837f5841ddfa7a46, 0xfc92986889284148
+data8 0x3fc4f7fa2876fca0, 0xbc6294812bf43acd
+data8 0x839585cb3e839773, 0xfc7d4e8f554ab12f
+data8 0x3fc538db36ee6960, 0x3cb910b773d4c578
+data8 0x83abfe8a5466246f, 0xfc67c2012cb6fa68
+data8 0x3fc579c1c6953cc0, 0x3cc5ede909fc47fc
+data8 0x83c2c2c861474d91, 0xfc51f2acf82041d5
+data8 0x3fc5baade9860880, 0x3cac63cdfc3588e5
+data8 0x83d9d2cfc2813637, 0xfc3be08165519325
+data8 0x3fc5fb9fb1e8e3a0, 0x3cbf7c8466578c29
+data8 0x83f12eebf397daac, 0xfc258b6ce6e6822f
+data8 0x3fc63c9731f39d40, 0x3cb6d2a7ffca3e9e
+data8 0x8408d76990b9296e, 0xfc0ef35db402af94
+data8 0x3fc67d947be9eec0, 0x3cb1980da09e6566
+data8 0x8420cc9659487cd7, 0xfbf81841c8082dc4
+data8 0x3fc6be97a21daf00, 0x3cc2ac8330e59aa5
+data8 0x84390ec132759ecb, 0xfbe0fa06e24cc390
+data8 0x3fc6ffa0b6ef05e0, 0x3ccc1a030fee56c4
+data8 0x84519e3a29df811a, 0xfbc9989a85ce0954
+data8 0x3fc740afcccca000, 0x3cc19692a5301ca6
+data8 0x846a7b527842d61b, 0xfbb1f3e9f8e45dc4
+data8 0x3fc781c4f633e2c0, 0x3cc0e98f3868a508
+data8 0x8483a65c8434b5f0, 0xfb9a0be244f4af45
+data8 0x3fc7c2e045b12140, 0x3cb2a8d309754420
+data8 0x849d1fabe4e97dd7, 0xfb81e070362116d1
+data8 0x3fc80401cddfd120, 0x3ca7a44544aa4ce6
+data8 0x84b6e795650817ea, 0xfb6971805af8411e
+data8 0x3fc84529a16ac020, 0x3c9e3b709c7d6f94
+data8 0x84d0fe6f0589da92, 0xfb50beff0423a2f5
+data8 0x3fc88657d30c49e0, 0x3cc60d65a7f0a278
+data8 0x84eb649000a73014, 0xfb37c8d84414755c
+data8 0x3fc8c78c758e8e80, 0x3cc94b2ee984c2b7
+data8 0x85061a50ccd13781, 0xfb1e8ef7eeaf764b
+data8 0x3fc908c79bcba900, 0x3cc8540ae794a2fe
+data8 0x8521200b1fb8916e, 0xfb05114998f76a83
+data8 0x3fc94a0958ade6c0, 0x3ca127f49839fa9c
+data8 0x853c7619f1618bf6, 0xfaeb4fb898b65d19
+data8 0x3fc98b51bf2ffee0, 0x3c8c9ba7a803909a
+data8 0x85581cd97f45e274, 0xfad14a3004259931
+data8 0x3fc9cca0e25d4ac0, 0x3cba458e91d3bf54
+data8 0x857414a74f8446b4, 0xfab7009ab1945a54
+data8 0x3fca0df6d551fe80, 0x3cc78ea1d329d2b2
+data8 0x85905de2341dea46, 0xfa9c72e3370d2fbc
+data8 0x3fca4f53ab3b6200, 0x3ccf60dca86d57ef
+data8 0x85acf8ea4e423ff8, 0xfa81a0f3e9fa0ee9
+data8 0x3fca90b777580aa0, 0x3ca4c4e2ec8a867e
+data8 0x85c9e62111a92e7d, 0xfa668ab6dec711b1
+data8 0x3fcad2224cf814e0, 0x3c303de5980d071c
+data8 0x85e725e947fbee97, 0xfa4b3015e883dbfe
+data8 0x3fcb13943f7d5f80, 0x3cc29d4eefa5cb1e
+data8 0x8604b8a7144cd054, 0xfa2f90fa9883a543
+data8 0x3fcb550d625bc6a0, 0x3c9e01a746152daf
+data8 0x86229ebff69e2415, 0xfa13ad4e3dfbe1c1
+data8 0x3fcb968dc9195ea0, 0x3ccc091bd73ae518
+data8 0x8640d89acf78858c, 0xf9f784f9e5a1877b
+data8 0x3fcbd815874eb160, 0x3cb5f4b89875e187
+data8 0x865f669fe390c7f5, 0xf9db17e65944eacf
+data8 0x3fcc19a4b0a6f9c0, 0x3cc5c0bc2b0bbf14
+data8 0x867e4938df7dc45f, 0xf9be65fc1f6c2e6e
+data8 0x3fcc5b3b58e061e0, 0x3cc1ca70df8f57e7
+data8 0x869d80d0db7e4c0c, 0xf9a16f237aec427a
+data8 0x3fcc9cd993cc4040, 0x3cbae93acc85eccf
+data8 0x86bd0dd45f4f8265, 0xf98433446a806e70
+data8 0x3fccde7f754f5660, 0x3cb22f70e64568d0
+data8 0x86dcf0b16613e37a, 0xf966b246a8606170
+data8 0x3fcd202d11620fa0, 0x3c962030e5d4c849
+data8 0x86fd29d7624b3d5d, 0xf948ec11a9d4c45b
+data8 0x3fcd61e27c10c0a0, 0x3cc7083c91d59217
+data8 0x871db9b741dbe44a, 0xf92ae08c9eca4941
+data8 0x3fcda39fc97be7c0, 0x3cc9258579e57211
+data8 0x873ea0c3722d6af2, 0xf90c8f9e71633363
+data8 0x3fcde5650dd86d60, 0x3ca4755a9ea582a9
+data8 0x875fdf6fe45529e8, 0xf8edf92dc5875319
+data8 0x3fce27325d6fe520, 0x3cbc1e2b6c1954f9
+data8 0x878176321154e2bc, 0xf8cf1d20f87270b8
+data8 0x3fce6907cca0d060, 0x3cb6ca4804750830
+data8 0x87a36580fe6bccf5, 0xf8affb5e20412199
+data8 0x3fceaae56fdee040, 0x3cad6b310d6fd46c
+data8 0x87c5add5417a5cb9, 0xf89093cb0b7c0233
+data8 0x3fceeccb5bb33900, 0x3cc16e99cedadb20
+data8 0x87e84fa9057914ca, 0xf870e64d40a15036
+data8 0x3fcf2eb9a4bcb600, 0x3cc75ee47c8b09e9
+data8 0x880b4b780f02b709, 0xf850f2c9fdacdf78
+data8 0x3fcf70b05fb02e20, 0x3cad6350d379f41a
+data8 0x882ea1bfc0f228ac, 0xf830b926379e6465
+data8 0x3fcfb2afa158b8a0, 0x3cce0ccd9f829985
+data8 0x885252ff21146108, 0xf810394699fe0e8e
+data8 0x3fcff4b77e97f3e0, 0x3c9b30faa7a4c703
+data8 0x88765fb6dceebbb3, 0xf7ef730f865f6df0
+data8 0x3fd01b6406332540, 0x3cdc5772c9e0b9bd
+data8 0x88ad1f69be2cc730, 0xf7bdc59bc9cfbd97
+data8 0x3fd04cf8ad203480, 0x3caeef44fe21a74a
+data8 0x88f763f70ae2245e, 0xf77a91c868a9c54e
+data8 0x3fd08f23ce0162a0, 0x3cd6290ab3fe5889
+data8 0x89431fc7bc0c2910, 0xf73642973c91298e
+data8 0x3fd0d1610f0c1ec0, 0x3cc67401a01f08cf
+data8 0x8990573407c7738e, 0xf6f0d71d1d7a2dd6
+data8 0x3fd113b0c65d88c0, 0x3cc7aa4020fe546f
+data8 0x89df0eb108594653, 0xf6aa4e6a05cfdef2
+data8 0x3fd156134ada6fe0, 0x3cc87369da09600c
+data8 0x8a2f4ad16e0ed78a, 0xf662a78900c35249
+data8 0x3fd19888f43427a0, 0x3cc62b220f38e49c
+data8 0x8a811046373e0819, 0xf619e180181d97cc
+data8 0x3fd1db121aed7720, 0x3ca3ede7490b52f4
+data8 0x8ad463df6ea0fa2c, 0xf5cffb504190f9a2
+data8 0x3fd21daf185fa360, 0x3caafad98c1d6c1b
+data8 0x8b294a8cf0488daf, 0xf584f3f54b8604e6
+data8 0x3fd2606046bf95a0, 0x3cdb2d704eeb08fa
+data8 0x8b7fc95f35647757, 0xf538ca65c960b582
+data8 0x3fd2a32601231ec0, 0x3cc661619fa2f126
+data8 0x8bd7e588272276f8, 0xf4eb7d92ff39fccb
+data8 0x3fd2e600a3865760, 0x3c8a2a36a99aca4a
+data8 0x8c31a45bf8e9255e, 0xf49d0c68cd09b689
+data8 0x3fd328f08ad12000, 0x3cb9efaf1d7ab552
+data8 0x8c8d0b520a35eb18, 0xf44d75cd993cfad2
+data8 0x3fd36bf614dcc040, 0x3ccacbb590bef70d
+data8 0x8cea2005d068f23d, 0xf3fcb8a23ab4942b
+data8 0x3fd3af11a079a6c0, 0x3cd9775872cf037d
+data8 0x8d48e837c8cd5027, 0xf3aad3c1e2273908
+data8 0x3fd3f2438d754b40, 0x3ca03304f667109a
+data8 0x8da969ce732f3ac7, 0xf357c60202e2fd7e
+data8 0x3fd4358c3ca032e0, 0x3caecf2504ff1a9d
+data8 0x8e0baad75555e361, 0xf3038e323ae9463a
+data8 0x3fd478ec0fd419c0, 0x3cc64bdc3d703971
+data8 0x8e6fb18807ba877e, 0xf2ae2b1c3a6057f7
+data8 0x3fd4bc6369fa40e0, 0x3cbb7122ec245cf2
+data8 0x8ed5843f4bda74d5, 0xf2579b83aa556f0c
+data8 0x3fd4fff2af11e2c0, 0x3c9cfa2dc792d394
+data8 0x8f3d29862c861fef, 0xf1ffde2612ca1909
+data8 0x3fd5439a4436d000, 0x3cc38d46d310526b
+data8 0x8fa6a81128940b2d, 0xf1a6f1bac0075669
+data8 0x3fd5875a8fa83520, 0x3cd8bf59b8153f8a
+data8 0x901206c1686317a6, 0xf14cd4f2a730d480
+data8 0x3fd5cb33f8cf8ac0, 0x3c9502b5c4d0e431
+data8 0x907f4ca5fe9cf739, 0xf0f186784a125726
+data8 0x3fd60f26e847b120, 0x3cc8a1a5e0acaa33
+data8 0x90ee80fd34aeda5e, 0xf09504ef9a212f18
+data8 0x3fd65333c7e43aa0, 0x3cae5b029cb1f26e
+data8 0x915fab35e37421c6, 0xf0374ef5daab5c45
+data8 0x3fd6975b02b8e360, 0x3cd5aa1c280c45e6
+data8 0x91d2d2f0d894d73c, 0xefd86321822dbb51
+data8 0x3fd6db9d05213b20, 0x3cbecf2c093ccd8b
+data8 0x9248000249200009, 0xef7840021aca5a72
+data8 0x3fd71ffa3cc87fc0, 0x3cb8d273f08d00d9
+data8 0x92bf3a7351f081d2, 0xef16e42021d7cbd5
+data8 0x3fd7647318b1ad20, 0x3cbce099d79cdc46
+data8 0x93388a8386725713, 0xeeb44dfce6820283
+data8 0x3fd7a908093fc1e0, 0x3ccb033ec17a30d9
+data8 0x93b3f8aa8e653812, 0xee507c126774fa45
+data8 0x3fd7edb9803e3c20, 0x3cc10aedb48671eb
+data8 0x94318d99d341ade4, 0xedeb6cd32f891afb
+data8 0x3fd83287f0e9cf80, 0x3c994c0c1505cd2a
+data8 0x94b1523e3dedc630, 0xed851eaa3168f43c
+data8 0x3fd87773cff956e0, 0x3cda3b7bce6a6b16
+data8 0x95334fc20577563f, 0xed1d8ffaa2279669
+data8 0x3fd8bc7d93a70440, 0x3cd4922edc792ce2
+data8 0x95b78f8e8f92f274, 0xecb4bf1fd2be72da
+data8 0x3fd901a5b3b9cf40, 0x3cd3fea1b00f9d0d
+data8 0x963e1b4e63a87c3f, 0xec4aaa6d08694cc1
+data8 0x3fd946eca98f2700, 0x3cdba4032d968ff1
+data8 0x96c6fcef314074fc, 0xebdf502d53d65fea
+data8 0x3fd98c52f024e800, 0x3cbe7be1ab8c95c9
+data8 0x97523ea3eab028b2, 0xeb72aea36720793e
+data8 0x3fd9d1d904239860, 0x3cd72d08a6a22b70
+data8 0x97dfeae6f4ee4a9a, 0xeb04c4096a884e94
+data8 0x3fda177f63e8ef00, 0x3cd818c3c1ebfac7
+data8 0x98700c7c6d85d119, 0xea958e90cfe1efd7
+data8 0x3fda5d468f92a540, 0x3cdf45fbfaa080fe
+data8 0x9902ae7487a9caa1, 0xea250c6224aab21a
+data8 0x3fdaa32f090998e0, 0x3cd715a9353cede4
+data8 0x9997dc2e017a9550, 0xe9b33b9ce2bb7638
+data8 0x3fdae939540d3f00, 0x3cc545c014943439
+data8 0x9a2fa158b29b649b, 0xe9401a573f8aa706
+data8 0x3fdb2f65f63f6c60, 0x3cd4a63c2f2ca8e2
+data8 0x9aca09f835466186, 0xe8cba69df9f0bf35
+data8 0x3fdb75b5773075e0, 0x3cda310ce1b217ec
+data8 0x9b672266ab1e0136, 0xe855de74266193d4
+data8 0x3fdbbc28606babc0, 0x3cdc84b75cca6c44
+data8 0x9c06f7579f0b7bd5, 0xe7debfd2f98c060b
+data8 0x3fdc02bf3d843420, 0x3cd225d967ffb922
+data8 0x9ca995db058cabdc, 0xe76648a991511c6e
+data8 0x3fdc497a9c224780, 0x3cde08101c5b825b
+data8 0x9d4f0b605ce71e88, 0xe6ec76dcbc02d9a7
+data8 0x3fdc905b0c10d420, 0x3cb1abbaa3edf120
+data8 0x9df765b9eecad5e6, 0xe6714846bdda7318
+data8 0x3fdcd7611f4b8a00, 0x3cbf6217ae80aadf
+data8 0x9ea2b320350540fe, 0xe5f4bab71494cd6b
+data8 0x3fdd1e8d6a0d56c0, 0x3cb726e048cc235c
+data8 0x9f51023562fc5676, 0xe576cbf239235ecb
+data8 0x3fdd65e082df5260, 0x3cd9e66872bd5250
+data8 0xa002620915c2a2f6, 0xe4f779b15f5ec5a7
+data8 0x3fddad5b02a82420, 0x3c89743b0b57534b
+data8 0xa0b6e21c2caf9992, 0xe476c1a233a7873e
+data8 0x3fddf4fd84bbe160, 0x3cbf7adea9ee3338
+data8 0xa16e9264cc83a6b2, 0xe3f4a16696608191
+data8 0x3fde3cc8a6ec6ee0, 0x3cce46f5a51f49c6
+data8 0xa22983528f3d8d49, 0xe3711694552da8a8
+data8 0x3fde84bd099a6600, 0x3cdc78f6490a2d31
+data8 0xa2e7c5d2e2e69460, 0xe2ec1eb4e1e0a5fb
+data8 0x3fdeccdb4fc685c0, 0x3cdd3aedb56a4825
+data8 0xa3a96b5599bd2532, 0xe265b74506fbe1c9
+data8 0x3fdf15241f23b3e0, 0x3cd440f3c6d65f65
+data8 0xa46e85d1ae49d7de, 0xe1ddddb499b3606f
+data8 0x3fdf5d98202994a0, 0x3cd6c44bd3fb745a
+data8 0xa53727ca3e11b99e, 0xe1548f662951b00d
+data8 0x3fdfa637fe27bf60, 0x3ca8ad1cd33054dd
+data8 0xa6036453bdc20186, 0xe0c9c9aeabe5e481
+data8 0x3fdfef0467599580, 0x3cc0f1ac0685d78a
+data8 0xa6d34f1969dda338, 0xe03d89d5281e4f81
+data8 0x3fe01bff067d6220, 0x3cc0731e8a9ef057
+data8 0xa7a6fc62f7246ff3, 0xdfafcd125c323f54
+data8 0x3fe04092d1ae3b40, 0x3ccabda24b59906d
+data8 0xa87e811a861df9b9, 0xdf20909061bb9760
+data8 0x3fe0653df0fd9fc0, 0x3ce94c8dcc722278
+data8 0xa959f2d2dd687200, 0xde8fd16a4e5f88bd
+data8 0x3fe08a00c1cae320, 0x3ce6b888bb60a274
+data8 0xaa3967cdeea58bda, 0xddfd8cabd1240d22
+data8 0x3fe0aedba3221c00, 0x3ced5941cd486e46
+data8 0xab904fd587263c84, 0xdd1f4472e1cf64ed
+data8 0x3fe0e651e85229c0, 0x3cdb6701042299b1
+data8 0xad686d44dd5a74bb, 0xdbf173e1f6b46e92
+data8 0x3fe1309cbf4cdb20, 0x3cbf1be7bb3f0ec5
+data8 0xaf524e15640ebee4, 0xdabd54896f1029f6
+data8 0x3fe17b4ee1641300, 0x3ce81dd055b792f1
+data8 0xb14eca24ef7db3fa, 0xd982cb9ae2f47e41
+data8 0x3fe1c66b9ffd6660, 0x3cd98ea31eb5ddc7
+data8 0xb35ec807669920ce, 0xd841bd1b8291d0b6
+data8 0x3fe211f66db3a5a0, 0x3ca480c35a27b4a2
+data8 0xb5833e4755e04dd1, 0xd6fa0bd3150b6930
+data8 0x3fe25df2e05b6c40, 0x3ca4bc324287a351
+data8 0xb7bd34c8000b7bd3, 0xd5ab9939a7d23aa1
+data8 0x3fe2aa64b32f7780, 0x3cba67314933077c
+data8 0xba0dc64d126cc135, 0xd4564563ce924481
+data8 0x3fe2f74fc9289ac0, 0x3cec1a1dc0efc5ec
+data8 0xbc76222cbbfa74a6, 0xd2f9eeed501125a8
+data8 0x3fe344b82f859ac0, 0x3ceeef218de413ac
+data8 0xbef78e31985291a9, 0xd19672e2182f78be
+data8 0x3fe392a22087b7e0, 0x3cd2619ba201204c
+data8 0xc19368b2b0629572, 0xd02baca5427e436a
+data8 0x3fe3e11206694520, 0x3cb5d0b3143fe689
+data8 0xc44b2ae8c6733e51, 0xceb975d60b6eae5d
+data8 0x3fe4300c7e945020, 0x3cbd367143da6582
+data8 0xc7206b894212dfef, 0xcd3fa6326ff0ac9a
+data8 0x3fe47f965d201d60, 0x3ce797c7a4ec1d63
+data8 0xca14e1b0622de526, 0xcbbe13773c3c5338
+data8 0x3fe4cfb4b09d1a20, 0x3cedfadb5347143c
+data8 0xcd2a6825eae65f82, 0xca34913d425a5ae9
+data8 0x3fe5206cc637e000, 0x3ce2798b38e54193
+data8 0xd06301095e1351ee, 0xc8a2f0d3679c08c0
+data8 0x3fe571c42e3d0be0, 0x3ccd7cb9c6c2ca68
+data8 0xd3c0d9f50057adda, 0xc70901152d59d16b
+data8 0x3fe5c3c0c108f940, 0x3ceb6c13563180ab
+data8 0xd74650a98cc14789, 0xc5668e3d4cbf8828
+data8 0x3fe61668a46ffa80, 0x3caa9092e9e3c0e5
+data8 0xdaf5f8579dcc8f8f, 0xc3bb61b3eed42d02
+data8 0x3fe669c251ad69e0, 0x3cccf896ef3b4fee
+data8 0xded29f9f9a6171b4, 0xc20741d7f8e8e8af
+data8 0x3fe6bdd49bea05c0, 0x3cdc6b29937c575d
+data8 0xe2df5765854ccdb0, 0xc049f1c2d1b8014b
+data8 0x3fe712a6b76c6e80, 0x3ce1ddc6f2922321
+data8 0xe71f7a9b94fcb4c3, 0xbe833105ec291e91
+data8 0x3fe76840418978a0, 0x3ccda46e85432c3d
+data8 0xeb96b72d3374b91e, 0xbcb2bb61493b28b3
+data8 0x3fe7bea9496d5a40, 0x3ce37b42ec6e17d3
+data8 0xf049183c3f53c39b, 0xbad848720223d3a8
+data8 0x3fe815ea59dab0a0, 0x3cb03ad41bfc415b
+data8 0xf53b11ec7f415f15, 0xb8f38b57c53c9c48
+data8 0x3fe86e0c84010760, 0x3cc03bfcfb17fe1f
+data8 0xfa718f05adbf2c33, 0xb70432500286b185
+data8 0x3fe8c7196b9225c0, 0x3ced99fcc6866ba9
+data8 0xfff200c3f5489608, 0xb509e6454dca33cc
+data8 0x3fe9211b54441080, 0x3cb789cb53515688
+// The following table entries are not used
+//data8 0x82e138a0fac48700, 0xb3044a513a8e6132
+//data8 0x3fe97c1d30f5b7c0, 0x3ce1eb765612d1d0
+//data8 0x85f4cc7fc670d021, 0xb0f2fb2ea6cbbc88
+//data8 0x3fe9d82ab4b5fde0, 0x3ced3fe6f27e8039
+//data8 0x89377c1387d5b908, 0xaed58e9a09014d5c
+//data8 0x3fea355065f87fa0, 0x3cbef481d25f5b58
+//data8 0x8cad7a2c98dec333, 0xacab929ce114d451
+//data8 0x3fea939bb451e2a0, 0x3c8e92b4fbf4560f
+//data8 0x905b7dfc99583025, 0xaa748cc0dbbbc0ec
+//data8 0x3feaf31b11270220, 0x3cdced8c61bd7bd5
+//data8 0x9446d8191f80dd42, 0xa82ff92687235baf
+//data8 0x3feb53de0bcffc20, 0x3cbe1722fb47509e
+//data8 0x98758ba086e4000a, 0xa5dd497a9c184f58
+//data8 0x3febb5f571cb0560, 0x3ce0c7774329a613
+//data8 0x9cee6c7bf18e4e24, 0xa37be3c3cd1de51b
+//data8 0x3fec197373bc7be0, 0x3ce08ebdb55c3177
+//data8 0xa1b944000a1b9440, 0xa10b2101b4f27e03
+//data8 0x3fec7e6bd023da60, 0x3ce5fc5fd4995959
+//data8 0xa6defd8ba04d3e38, 0x9e8a4b93cad088ec
+//data8 0x3fece4f404e29b20, 0x3cea3413401132b5
+//data8 0xac69dd408a10c62d, 0x9bf89d5d17ddae8c
+//data8 0x3fed4d2388f63600, 0x3cd5a7fb0d1d4276
+//data8 0xb265c39cbd80f97a, 0x99553d969fec7beb
+//data8 0x3fedb714101e0a00, 0x3cdbda21f01193f2
+//data8 0xb8e081a16ae4ae73, 0x969f3e3ed2a0516c
+//data8 0x3fee22e1da97bb00, 0x3ce7231177f85f71
+//data8 0xbfea427678945732, 0x93d5990f9ee787af
+//data8 0x3fee90ac13b18220, 0x3ce3c8a5453363a5
+//data8 0xc79611399b8c90c5, 0x90f72bde80febc31
+//data8 0x3fef009542b712e0, 0x3ce218fd79e8cb56
+//data8 0xcffa8425040624d7, 0x8e02b4418574ebed
+//data8 0x3fef72c3d2c57520, 0x3cd32a717f82203f
+//data8 0xd93299cddcf9cf23, 0x8af6ca48e9c44024
+//data8 0x3fefe762b77744c0, 0x3ce53478a6bbcf94
+//data8 0xe35eda760af69ad9, 0x87d1da0d7f45678b
+//data8 0x3ff02f511b223c00, 0x3ced6e11782c28fc
+//data8 0xeea6d733421da0a6, 0x84921bbe64ae029a
+//data8 0x3ff06c5c6f8ce9c0, 0x3ce71fc71c1ffc02
+//data8 0xfb3b2c73fc6195cc, 0x813589ba3a5651b6
+//data8 0x3ff0aaf2613700a0, 0x3cf2a72d2fd94ef3
+//data8 0x84ac1fcec4203245, 0xfb73a828893df19e
+//data8 0x3ff0eb367c3fd600, 0x3cf8054c158610de
+//data8 0x8ca50621110c60e6, 0xf438a14c158d867c
+//data8 0x3ff12d51caa6b580, 0x3ce6bce9748739b6
+//data8 0x95b8c2062d6f8161, 0xecb3ccdd37b369da
+//data8 0x3ff1717418520340, 0x3ca5c2732533177c
+//data8 0xa0262917caab4ad1, 0xe4dde4ddc81fd119
+//data8 0x3ff1b7d59dd40ba0, 0x3cc4c7c98e870ff5
+//data8 0xac402c688b72f3f4, 0xdcae469be46d4c8d
+//data8 0x3ff200b93cc5a540, 0x3c8dd6dc1bfe865a
+//data8 0xba76968b9eabd9ab, 0xd41a8f3df1115f7f
+//data8 0x3ff24c6f8f6affa0, 0x3cf1acb6d2a7eff7
+//data8 0xcb63c87c23a71dc5, 0xcb161074c17f54ec
+//data8 0x3ff29b5b338b7c80, 0x3ce9b5845f6ec746
+//data8 0xdfe323b8653af367, 0xc19107d99ab27e42
+//data8 0x3ff2edf6fac7f5a0, 0x3cf77f961925fa02
+//data8 0xf93746caaba3e1f1, 0xb777744a9df03bff
+//data8 0x3ff344df237486c0, 0x3cf6ddf5f6ddda43
+//data8 0x8ca77052f6c340f0, 0xacaf476f13806648
+//data8 0x3ff3a0dfa4bb4ae0, 0x3cfee01bbd761bff
+//data8 0xa1a48604a81d5c62, 0xa11575d30c0aae50
+//data8 0x3ff4030b73c55360, 0x3cf1cf0e0324d37c
+//data8 0xbe45074b05579024, 0x9478e362a07dd287
+//data8 0x3ff46ce4c738c4e0, 0x3ce3179555367d12
+//data8 0xe7a08b5693d214ec, 0x8690e3575b8a7c3b
+//data8 0x3ff4e0a887c40a80, 0x3cfbd5d46bfefe69
+//data8 0x94503d69396d91c7, 0xedd2ce885ff04028
+//data8 0x3ff561ebd9c18cc0, 0x3cf331bd176b233b
+//data8 0xced1d96c5bb209e6, 0xc965278083808702
+//data8 0x3ff5f71d7ff42c80, 0x3ce3301cc0b5a48c
+//data8 0xabac2cee0fc24e20, 0x9c4eb1136094cbbd
+//data8 0x3ff6ae4c63222720, 0x3cf5ff46874ee51e
+//data8 0x8040201008040201, 0xb4d7ac4d9acb1bf4
+//data8 0x3ff7b7d33b928c40, 0x3cfacdee584023bb
+LOCAL_OBJECT_END(T_table)
+
+
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+ // C_3
+data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc
+ // C_5
+data8 0x999999999999999a, 0x0000000000003ffb
+ // C_7, C_9
+data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8
+ // pi/2 (low, high)
+data8 0x3C91A62633145C07, 0x3FF921FB54442D18
+ // C_11, C_13
+data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e
+ // C_15, C_17
+data8 0x3f8c99999999999a, 0x3f87a87878787223
+ // pi (low, high)
+data8 0x3CA1A62633145C07, 0x400921FB54442D18
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+R_DBL_S = r21
+R_EXP0 = r22
+R_EXP = r15
+R_SGNMASK = r23
+R_TMP = r24
+R_TMP2 = r25
+R_INDEX = r26
+R_TMP3 = r27
+R_TMP03 = r27
+R_TMP4 = r28
+R_TMP5 = r23
+R_TMP6 = r22
+R_TMP7 = r21
+R_T = r29
+R_BIAS = r20
+
+F_T = f6
+F_1S2 = f7
+F_1S2_S = f9
+F_INV_1T2 = f10
+F_SQRT_1T2 = f11
+F_S2T2 = f12
+F_X = f13
+F_D = f14
+F_2M64 = f15
+
+F_CS2 = f32
+F_CS3 = f33
+F_CS4 = f34
+F_CS5 = f35
+F_CS6 = f36
+F_CS7 = f37
+F_CS8 = f38
+F_CS9 = f39
+F_S23 = f40
+F_S45 = f41
+F_S67 = f42
+F_S89 = f43
+F_S25 = f44
+F_S69 = f45
+F_S29 = f46
+F_X2 = f47
+F_X4 = f48
+F_TSQRT = f49
+F_DTX = f50
+F_R = f51
+F_R2 = f52
+F_R3 = f53
+F_R4 = f54
+
+F_C3 = f55
+F_C5 = f56
+F_C7 = f57
+F_C9 = f58
+F_P79 = f59
+F_P35 = f60
+F_P39 = f61
+
+F_ATHI = f62
+F_ATLO = f63
+
+F_T1 = f64
+F_Y = f65
+F_Y2 = f66
+F_ANDMASK = f67
+F_ORMASK = f68
+F_S = f69
+F_05 = f70
+F_SQRT_1S2 = f71
+F_DS = f72
+F_Z = f73
+F_1T2 = f74
+F_DZ = f75
+F_ZE = f76
+F_YZ = f77
+F_Y1S2 = f78
+F_Y1S2X = f79
+F_1X = f80
+F_ST = f81
+F_1T2_ST = f82
+F_TSS = f83
+F_Y1S2X2 = f84
+F_DZ_TERM = f85
+F_DTS = f86
+F_DS2X = f87
+F_T2 = f88
+F_ZY1S2S = f89
+F_Y1S2_1X = f90
+F_TS = f91
+F_PI2_LO = f92
+F_PI2_HI = f93
+F_S19 = f94
+F_INV1T2_2 = f95
+F_CORR = f96
+F_DZ0 = f97
+
+F_C11 = f98
+F_C13 = f99
+F_C15 = f100
+F_C17 = f101
+F_P1113 = f102
+F_P1517 = f103
+F_P1117 = f104
+F_P317 = f105
+F_R8 = f106
+F_HI = f107
+F_1S2_HI = f108
+F_DS2 = f109
+F_Y2_2 = f110
+//F_S2 = f111
+//F_S_DS2 = f112
+F_S_1S2S = f113
+F_XL = f114
+F_2M128 = f115
+F_1AS = f116
+F_AS = f117
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(acosl)
+
+{.mfi
+ // get exponent, mantissa (rounded to double precision) of s
+ getf.d R_DBL_S = f8
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // r2 = pointer to T_table
+ addl r2 = @ltoff(T_table), gp
+}
+
+{.mfi
+ // sign mask
+ mov R_SGNMASK = 0x20000
+ nop.f 0
+ // bias-63-1
+ mov R_TMP03 = 0xffff-64;;
+}
+
+
+{.mfi
+ // get exponent of s
+ getf.exp R_EXP = f8
+ nop.f 0
+ // R_TMP4 = 2^45
+ shl R_TMP4 = R_SGNMASK, 45-17
+}
+
+{.mlx
+ // load bias-4
+ mov R_TMP = 0xffff-4
+ // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)
+ movl R_TMP2 = 0x7fcd413cccfe779a;;
+}
+
+
+{.mfi
+ // load 2^{-64} in FP register
+ setf.exp F_2M64 = R_TMP03
+ nop.f 0
+ // index = (0x7-exponent)|b1 b2.. b6
+ extr.u R_INDEX = R_DBL_S, 46, 9
+}
+
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 x.. x
+ or R_T = R_DBL_S, R_TMP4
+ nop.f 0
+ // R_TMP4 = 2^45-1
+ sub R_TMP4 = R_TMP4, r0, 1;;
+}
+
+
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 0.. 0
+ andcm R_T = R_T, R_TMP4
+ nop.f 0
+ // eliminate sign from R_DBL_S (shift left by 1)
+ shl R_TMP3 = R_DBL_S, 1
+}
+
+{.mfi
+ // R_BIAS = 3*2^6
+ mov R_BIAS = 0xc0
+ nop.f 0
+ // eliminate sign from R_EXP
+ andcm R_EXP0 = R_EXP, R_SGNMASK;;
+}
+
+
+
+{.mfi
+ // load start address for T_table
+ ld8 r2 = [r2]
+ nop.f 0
+ // p8 = 1 if |s|> = sqrt(2)/2
+ cmp.geu p8, p0 = R_TMP3, R_TMP2
+}
+
+{.mlx
+ // p7 = 1 if |s|<2^{-4} (exponent of s = sqrt(2)/2, take alternate path
+ (p8) br.cond.sptk LARGE_S
+}
+
+{.mlx
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ // sqrt coefficient cs9 = 55*13/128
+ movl R_TMP = 0x40b2c000;;
+}
+
+
+{.mfi
+ // sqrt coefficient cs8 = -33*13/128
+ setf.s F_CS8 = R_TMP2
+ nop.f 0
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5
+}
+
+{.mfi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ mov R_TMP4 = 0xffff - 1
+ nop.f 0
+ // sqrt coefficient cs6 = -21/16
+ mov R_TMP6 = 0xbfa8;;
+}
+
+
+{.mlx
+ // table index
+ add r2 = r2, R_INDEX
+ // sqrt coefficient cs7 = 33/16
+ movl R_TMP2 = 0x40040000;;
+}
+
+
+{.mmi
+ // load cs9 = 55*13/128
+ setf.s F_CS9 = R_TMP
+ // sqrt coefficient cs5 = 7/8
+ mov R_TMP3 = 0x3f60
+ // sqrt coefficient cs6 = 21/16
+ shl R_TMP6 = R_TMP6, 16;;
+}
+
+
+{.mmi
+ // load significand of 1/(1-t^2)
+ ldf8 F_INV_1T2 = [r2], 8
+ // sqrt coefficient cs7 = 33/16
+ setf.s F_CS7 = R_TMP2
+ // sqrt coefficient cs4 = -5/8
+ mov R_TMP5 = 0xbf20;;
+}
+
+
+{.mmi
+ // load significand of sqrt(1-t^2)
+ ldf8 F_SQRT_1T2 = [r2], 8
+ // sqrt coefficient cs6 = 21/16
+ setf.s F_CS6 = R_TMP6
+ // sqrt coefficient cs5 = 7/8
+ shl R_TMP3 = R_TMP3, 16;;
+}
+
+
+{.mmi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ setf.exp F_CS3 = R_TMP4
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // sqrt coefficient cs4 = -5/8
+ shl R_TMP5 = R_TMP5, 16;;
+}
+
+
+{.mfi
+ // sqrt coefficient cs5 = 7/8
+ setf.s F_CS5 = R_TMP3
+ // d = s-t
+ fms.s1 F_D = f8, f1, F_T
+ // set p6 = 1 if s<0, p11 = 1 if s> = 0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
+}
+
+{.mfi
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // s+t
+ fma.s1 F_S2T2 = f8, f1, F_T
+ nop.i 0;;
+}
+
+
+{.mfi
+ // sqrt coefficient cs4 = -5/8
+ setf.s F_CS4 = R_TMP5
+ // s^2-t^2
+ fma.s1 F_S2T2 = F_S2T2, F_D, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+{.mfi
+ // load C_5
+ ldfe F_C5 = [r3], 16
+ // set correct exponent for sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load C_7, C_9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // x = -(s^2-t^2)/(1-t^2)/2
+ fnma.s1 F_X = F_INV_1T2, F_S2T2, f0
+ nop.i 0;;
+}
+
+
+{.mmf
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // load pi/2
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // t*sqrt(1-t^2)
+ fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // cs9*x+cs8
+ fma.s1 F_S89 = F_CS9, F_X, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // cs7*x+cs6
+ fma.s1 F_S67 = F_CS7, F_X, F_CS6
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // cs5*x+cs4
+ fma.s1 F_S45 = F_CS5, F_X, F_CS4
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x*x
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (s-t)-t*x
+ fnma.s1 F_DTX = F_T, F_X, F_D
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // cs3*x+cs2 (cs2 = -0.5 = -cs3)
+ fms.s1 F_S23 = F_CS3, F_X, F_CS3
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_low
+ (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_high
+ (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // cs9*x^3+cs8*x^2+cs7*x+cs6
+ fma.s1 F_S69 = F_S89, F_X2, F_S67
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // t*sqrt(1-t^2)*x^2
+ fma.s1 F_TSQRT = F_TSQRT, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S25 = F_S45, F_X2, F_S23
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // ((s-t)-t*x)*sqrt(1-t^2)
+ fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // (pi/2)_high - asin(t)_high
+ fnma.s1 F_ATHI = F_ATHI, f1, F_PI2_HI
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // asin(t)_low - (pi/2)_low
+ fnma.s1 F_ATLO = F_PI2_LO, f1, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S29 = F_S69, F_X4, F_S25
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29
+ fnma.s1 F_R = F_S29, F_TSQRT, F_DTX
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, f1, F_R
+ nop.i 0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // result = (pi/2)-asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fnma.s0 f8 = F_P39, f1, F_ATHI
+ // return
+ br.ret.sptk b0;;
+}
+
+
+
+
+LARGE_S:
+
+{.mfi
+ // bias-1
+ mov R_TMP3 = 0xffff - 1
+ // y ~ 1/sqrt(1-s^2)
+ frsqrta.s1 F_Y, p7 = F_1S2
+ // c9 = 55*13*17/128
+ mov R_TMP4 = 0x10af7b
+}
+
+{.mlx
+ // c8 = -33*13*15/128
+ mov R_TMP5 = 0x184923
+ movl R_TMP2 = 0xff00000000000000;;
+}
+
+{.mfi
+ // set p6 = 1 if s<0, p11 = 1 if s>0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // set p9 = 1
+ cmp.eq p9, p0 = r0, r0;;
+}
+
+
+{.mfi
+ // load 0.5
+ setf.exp F_05 = R_TMP3
+ // (1-s^2) rounded to single precision
+ fnma.s.s1 F_1S2_S = f8, f8, f1
+ // c9 = 55*13*17/128
+ shl R_TMP4 = R_TMP4, 10
+}
+
+{.mlx
+ // AND mask for getting t ~ sqrt(1-s^2)
+ setf.sig F_ANDMASK = R_TMP2
+ // OR mask
+ movl R_TMP2 = 0x0100000000000000;;
+}
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // 1-|s|
+ (p6) fma.s1 F_1AS = f8, f1, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // 1-|s|
+ (p11) fnma.s1 F_1AS = f8, f1, f1
+ nop.i 0;;
+}
+
+
+{.mfi
+ // c9 = 55*13*17/128
+ setf.s F_CS9 = R_TMP4
+ // |s|
+ (p6) fnma.s1 F_AS = f8, f1, f0
+ // c8 = -33*13*15/128
+ shl R_TMP5 = R_TMP5, 11
+}
+
+{.mfi
+ // c7 = 33*13/16
+ mov R_TMP4 = 0x41d68
+ // |s|
+ (p11) fma.s1 F_AS = f8, f1, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ setf.sig F_ORMASK = R_TMP2
+ // y^2
+ fma.s1 F_Y2 = F_Y, F_Y, f0
+ // c7 = 33*13/16
+ shl R_TMP4 = R_TMP4, 12
+}
+
+{.mfi
+ // c6 = -33*7/16
+ mov R_TMP6 = 0xc1670
+ // y' ~ sqrt(1-s^2)
+ fma.s1 F_T1 = F_Y, F_1S2, f0
+ // c5 = 63/8
+ mov R_TMP7 = 0x40fc;;
+}
+
+
+{.mlx
+ // load c8 = -33*13*15/128
+ setf.s F_CS8 = R_TMP5
+ // c4 = -35/8
+ movl R_TMP5 = 0xc08c0000;;
+}
+
+{.mfi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // 1-s-(1-s^2)_s
+ fnma.s1 F_DS = F_1S2_S, f1, F_1AS
+ // p9 = 0 if p7 = 1 (p9 = 1 for special cases only)
+ (p7) cmp.ne p9, p0 = r0, r0
+}
+
+{.mlx
+ // load c7 = 33*13/16
+ setf.s F_CS7 = R_TMP4
+ // c3 = 5/2
+ movl R_TMP4 = 0x40200000;;
+}
+
+
+{.mlx
+ // load c4 = -35/8
+ setf.s F_CS4 = R_TMP5
+ // c2 = -3/2
+ movl R_TMP5 = 0xbfc00000;;
+}
+
+
+{.mfi
+ // load c3 = 5/2
+ setf.s F_CS3 = R_TMP4
+ // x = (1-s^2)_s*y^2-1
+ fms.s1 F_X = F_1S2_S, F_Y2, f1
+ // c6 = -33*7/16
+ shl R_TMP6 = R_TMP6, 12
+}
+
+{.mfi
+ nop.m 0
+ // y^2/2
+ fma.s1 F_Y2_2 = F_Y2, F_05, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load c6 = -33*7/16
+ setf.s F_CS6 = R_TMP6
+ // eliminate lower bits from y'
+ fand F_T = F_T1, F_ANDMASK
+ // c5 = 63/8
+ shl R_TMP7 = R_TMP7, 16
+}
+
+
+{.mfb
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // 1-(1-s^2)_s-s^2
+ fma.s1 F_DS = F_AS, F_1AS, F_DS
+ // p9 = 1 if s is a special input (NaN, or |s|> = 1)
+ (p9) br.cond.spnt acosl_SPECIAL_CASES;;
+}
+
+{.mmf
+ // get exponent, significand of y' (in single prec.)
+ getf.s R_TMP = F_T1
+ // load c3 = -3/2
+ setf.s F_CS2 = R_TMP5
+ // y*(1-s^2)
+ fma.s1 F_Y1S2 = F_Y, F_1S2, f0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // if s<0, set s = -s
+ (p6) fnma.s1 f8 = f8, f1, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load c5 = 63/8
+ setf.s F_CS5 = R_TMP7
+ // x = (1-s^2)_s*y^2-1+(1-(1-s^2)_s-s^2)*y^2
+ fma.s1 F_X = F_DS, F_Y2, F_X
+ // for t = 2^k*1.b1 b2.., get 7-k|b1.. b6
+ extr.u R_INDEX = R_TMP, 17, 9;;
+}
+
+
+{.mmi
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ nop.m 0
+ // get exponent of y
+ shr.u R_TMP2 = R_TMP, 23;;
+}
+
+{.mmi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // set p8 = 1 if y'<2^{-4}
+ cmp.gt p8, p0 = 0x7b, R_TMP2
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5;;
+}
+
+
+{.mfb
+ // get table index for sqrt(1-t^2)
+ add r2 = r2, R_INDEX
+ // get t = 2^k*1.b1 b2.. b7 1
+ for F_T = F_T, F_ORMASK
+ (p8) br.cond.spnt VERY_LARGE_INPUT;;
+}
+
+
+
+{.mmf
+ // load C5
+ ldfe F_C5 = [r3], 16
+ // load 1/(1-t^2)
+ ldfp8 F_INV_1T2, F_SQRT_1T2 = [r2], 16
+ // x = ((1-s^2)*y^2-1)/2
+ fma.s1 F_X = F_X, F_05, f0;;
+}
+
+
+
+{.mmf
+ nop.m 0
+ // C7, C9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // set correct exponent for t
+ fmerge.se F_T = F_T1, F_T;;
+}
+
+
+
+{.mfi
+ // get address for loading pi
+ add r3 = 48, r3
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // pi (low, high)
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-x
+ fnma.s1 F_1X = F_X, f1, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-t^2
+ fnma.s1 F_1T2 = F_T, F_T, f1
+ nop.i 0
+}
+
+{.mfi
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // t*s
+ fma.s1 F_TS = F_T, f8, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // 0.5/(1-t^2)
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // z~sqrt(1-t^2), rounded to 24 significant bits
+ fma.s.s1 F_Z = F_SQRT_1T2, F_2M64, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // s*t rounded to 24 significant bits
+ fma.s.s1 F_TSS = F_T, f8, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // ST = (t^2-1+s^2) rounded to 24 significant bits
+ fms.s.s1 F_ST = f8, f8, F_1T2
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 0.25/(1-t^2)
+ fma.s1 F_INV1T2_2 = F_05, F_INV_1T2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // t*s-sqrt(1-t^2)*(1-s^2)*y
+ fnma.s1 F_TS = F_Y1S2, F_SQRT_1T2, F_TS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z*0.5/(1-t^2)
+ fma.s1 F_ZE = F_INV_1T2, F_SQRT_1T2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // z^2+t^2-1
+ fms.s1 F_DZ0 = F_Z, F_Z, F_1T2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DS2X = F_X, F_DS, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // t*s-(t*s)_s
+ fms.s1 F_DTS = F_T, f8, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*z
+ fma.s1 F_YZ = F_Z, F_Y, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // t^2
+ fma.s1 F_T2 = F_T, F_T, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-t^2+ST
+ fma.s1 F_1T2_ST = F_ST, f1, F_1T2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)(1-x)
+ fma.s1 F_Y1S2_1X = F_Y1S2, F_1X, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // dz ~ sqrt(1-t^2)-z
+ fma.s1 F_DZ = F_DZ0, F_ZE, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // -1+correction for sqrt(1-t^2)-z
+ fnma.s1 F_CORR = F_INV1T2_2, F_DZ0, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // z*y*(1-s^2)_s
+ fma.s1 F_ZY1S2S = F_YZ, F_1S2_S, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // s^2-(1-t^2+ST)
+ fms.s1 F_1T2_ST = f8, f8, F_1T2_ST
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DTS = F_YZ, F_DS2X, F_DTS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ, F_Y1S2_1X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R = t*s-sqrt(1-t^2)*(1-s^2)*y+sqrt(1-t^2)*(1-s^2)*y*PS19
+ // (used for polynomial evaluation)
+ fma.s1 F_R = F_S19, F_SQRT_1T2, F_TS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // apply correction to dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, F_CORR, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x+dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, f1, F_DTS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // asin(t)_low-(pi)_low (if s<0)
+ (p6) fms.s1 F_ATLO = F_ATLO, f1, F_PI2_LO
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z
+ fnma.s1 F_TSS = F_T2, F_YZ, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)
+ fma.s1 F_DZ_TERM = F_YZ, F_1T2_ST, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi)_hi-asin(t)_hi (if s<0)
+ (p6) fms.s1 F_ATHI = F_PI2_HI, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)+
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29
+ fma.s1 F_DZ_TERM = F_SQRT_1T2, F_S29, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_TSS = F_YZ, F_ST, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // -asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fms.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_P39, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_ZY1S2S, F_X, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6) +
+ // + (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_DZ_TERM = F_TSS, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // result: add high part of table value
+ // s>0 in this case
+ (p11) fnma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // result: add high part of pi-table value
+ // if s<0
+ (p6) fma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+SMALL_S:
+
+ // use 15-term polynomial approximation
+
+{.mmi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp;;
+ // load start address for coefficients
+ ld8 r3 = [r3]
+ mov R_TMP = 0x3fbf;;
+}
+
+
+{.mmi
+ add r2 = 64, r3
+ ldfe F_C3 = [r3], 16
+ // p7 = 1 if |s|<2^{-64} (exponent of s0, set F_PI2_LO=0
+ (p11) fma.s1 F_PI2_HI = f0, f0, f0;;
+}
+
+{.mfi
+ nop.m 0
+ (p11) fma.s1 F_PI2_LO = f0, f0, f0
+ nop.i 0;;
+}
+
+{.mfi
+ // adjust address for C_11
+ add r3 = 16, r3
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
+}
+
+{.mfi
+ // C11, C13
+ ldfpd F_C11, F_C13 = [r3], 16
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
+}
+
+
+{.mfi
+ // C15, C17
+ ldfpd F_C15, F_C17 = [r3], 16
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
+}
+
+
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // (pi)_high-y*(1-s^2)_s
+ fnma.s1 F_HI = F_Y, F_1S2_S, F_PI2_HI
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // -(y*(1-s^2)_s)_high
+ fms.s1 F_1S2_HI = F_HI, f1, F_PI2_HI
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)_s-(y*(1-s^2))_high
+ fma.s1 F_DS2 = F_Y, F_1S2_S, F_1S2_HI
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R ~ sqrt(1-s^2)
+ // (used for polynomial evaluation)
+ fnma.s1 F_R = F_S19, f1, F_Y1S2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)-(y*(1-s^2))_high
+ fma.s1 F_DS2 = F_Y, F_DS, F_DS2
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // (pi)_low+(PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, F_PI2_LO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s<0
+ // (pi)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)
+ fms.s1 F_S29 = F_S29, f1, F_DS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c11+c13*R^2
+ fma.s1 F_P1113 = F_C13, F_R2, F_C11
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c15+c17*R^2
+ fma.s1 F_P1517 = F_C17, F_R2, F_C15
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi)_low+(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-(y*(1-s^2))_high)+y*(1-s^2)*x
+ fma.s1 F_S29 = F_Y1S2, F_X, F_S29
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c11+c13*R^2+c15*R^4+c17*R^6
+ fma.s1 F_P1117 = F_P1517, F_R4, F_P1113
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // R^8
+ fma.s1 F_R8 = F_R4, F_R4, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6+..+c17*R^14
+ fma.s1 F_P317 = F_P1117, F_R8, F_P39
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
+ // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
+ fnma.s1 F_S29 = F_P317, F_R3, F_S29
+ nop.i 0;;
+}
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // Result (if s<0):
+ // (pi)_low-(PS29*x^2)*y*(1-s^2)-(y*(1-s^2)-
+ // -(y*(1-s^2))_high)+y*(1-s^2)*x - P3, 17
+ // +(pi)_high-(y*(1-s^2))_high
+ (p6) fma.s0 f8 = F_S29, f1, F_HI
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // Result (if s>0):
+ // (PS29*x^2)*y*(1-s^2)-
+ // -y*(1-s^2)*x + P3, 17
+ // +(y*(1-s^2))
+ (p11) fms.s0 f8 = F_Y, F_1S2_S, F_S29
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+acosl_SPECIAL_CASES:
+
+{.mfi
+ alloc r32 = ar.pfs, 1, 4, 4, 0
+ // check if the input is a NaN, or unsupported format
+ // (i.e. not infinity or normal/denormal)
+ fclass.nm p7, p8 = f8, 0x3f
+ // pointer to pi/2
+ add r3 = 96, r3;;
+}
+
+
+{.mfi
+ // load pi/2
+ ldfpd F_PI2_HI, F_PI2_LO = [r3]
+ // get |s|
+ fmerge.s F_S = f0, f8
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // if NaN, quietize it, and return
+ (p7) fma.s0 f8 = f8, f1, f0
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // |s| = 1 ?
+ fcmp.eq.s0 p9, p10 = F_S, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // load FR_X
+ fma.s1 FR_X = f8, f1, f0
+ // load error tag
+ mov GR_Parameter_TAG = 57;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s = 1, result is 0
+ (p9) fma.s0 f8 = f0, f0, f0
+ // set p6=0 for |s|>1
+ (p10) cmp.ne p6, p0 = r0, r0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // if s = -1, result is pi
+ (p6) fma.s0 f8 = F_PI2_HI, f1, F_PI2_LO
+ // return if |s| = 1
+ (p9) br.ret.sptk b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // get Infinity
+ frcpa.s1 FR_RESULT, p0 = f1, f0
+ nop.i 0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // return QNaN indefinite (0*Infinity)
+ fma.s0 FR_RESULT = f0, FR_RESULT, f0
+ nop.b 0;;
+}
+
+
+GLOBAL_IEEE754_END(acosl)
+libm_alias_ldouble_other (__acos, acos)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfe [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asin.S b/sysdeps/ia64/fpu/e_asin.S
new file mode 100644
index 0000000000..f9ae279609
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_asin.S
@@ -0,0 +1,854 @@
+.file "asin.s"
+
+
+// Copyright (c) 2000 - 2003 Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 08/17/00 New and much faster algorithm.
+// 08/31/00 Avoided bank conflicts on loads, shortened |x|=1 path,
+// fixed mfb split issue stalls.
+// 12/19/00 Fixed small arg cases to force inexact, or inexact and underflow.
+// 08/02/02 New and much faster algorithm II
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+
+// Description
+//=========================================
+// The asin function computes the principal value of the arc sine of x.
+// asin(0) returns 0, asin(1) returns pi/2, asin(-1) returns -pi/2.
+// A domain error occurs for arguments not in the range [-1,+1].
+//
+// The asin function returns the arc sine in the range [-pi/2, +pi/2] radians.
+//
+// There are 8 paths:
+// 1. x = +/-0.0
+// Return asin(x) = +/-0.0
+//
+// 2. 0.0 < |x| < 0.625
+// Return asin(x) = x + x^3 *PolA(x^2)
+// where PolA(x^2) = A3 + A5*x^2 + A7*x^4 +...+ A35*x^32
+//
+// 3. 0.625 <=|x| < 1.0
+// Return asin(x) = sign(x) * ( Pi/2 - sqrt(R) * PolB(R))
+// Where R = 1 - |x|,
+// PolB(R) = B0 + B1*R + B2*R^2 +...+B12*R^12
+//
+// sqrt(R) is approximated using the following sequence:
+// y0 = (1 + eps)/sqrt(R) - initial approximation by frsqrta,
+// |eps| < 2^(-8)
+// Then 3 iterations are used to refine the result:
+// H0 = 0.5*y0
+// S0 = R*y0
+//
+// d0 = 0.5 - H0*S0
+// H1 = H0 + d0*H0
+// S1 = S0 + d0*S0
+//
+// d1 = 0.5 - H1*S1
+// H2 = H1 + d0*H1
+// S2 = S1 + d0*S1
+//
+// d2 = 0.5 - H2*S2
+// S3 = S3 + d2*S3
+//
+// S3 approximates sqrt(R) with enough accuracy for this algorithm
+//
+// So, the result should be reconstracted as follows:
+// asin(x) = sign(x) * (Pi/2 - S3*PolB(R))
+//
+// But for optimization perposes the reconstruction step is slightly
+// changed:
+// asin(x) = sign(x)*(Pi/2 - PolB(R)*S2) + sign(x)*d2*S2*PolB(R)
+//
+// 4. |x| = 1.0
+// Return asin(x) = sign(x)*Pi/2
+//
+// 5. 1.0 < |x| <= +INF
+// A domain error occurs for arguments not in the range [-1,+1]
+//
+// 6. x = [S,Q]NaN
+// Return asin(x) = QNaN
+//
+// 7. x is denormal
+// Return asin(x) = x + x^3,
+//
+// 8. x is unnormal
+// Normalize input in f8 and return to the very beginning of the function
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6, f7, f9 -> f15, f32 -> f63
+
+// General registers used:
+// r3, r21 -> r31, r32 -> r38
+
+// Predicate registers used:
+// p0, p6 -> p14
+
+//
+// Assembly macros
+//=========================================
+// integer registers used
+// scratch
+rTblAddr = r3
+
+rPiBy2Ptr = r21
+rTmpPtr3 = r22
+rDenoBound = r23
+rOne = r24
+rAbsXBits = r25
+rHalf = r26
+r0625 = r27
+rSign = r28
+rXBits = r29
+rTmpPtr2 = r30
+rTmpPtr1 = r31
+
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+// scratch
+fXSqr = f6
+fXCube = f7
+fXQuadr = f9
+f1pX = f10
+f1mX = f11
+f1pXRcp = f12
+f1mXRcp = f13
+fH = f14
+fS = f15
+// stacked
+fA3 = f32
+fB1 = f32
+fA5 = f33
+fB2 = f33
+fA7 = f34
+fPiBy2 = f34
+fA9 = f35
+fA11 = f36
+fB10 = f35
+fB11 = f36
+fA13 = f37
+fA15 = f38
+fB4 = f37
+fB5 = f38
+fA17 = f39
+fA19 = f40
+fB6 = f39
+fB7 = f40
+fA21 = f41
+fA23 = f42
+fB3 = f41
+fB8 = f42
+fA25 = f43
+fA27 = f44
+fB9 = f43
+fB12 = f44
+fA29 = f45
+fA31 = f46
+fA33 = f47
+fA35 = f48
+fBaseP = f49
+fB0 = f50
+fSignedS = f51
+fD = f52
+fHalf = f53
+fR = f54
+fCloseTo1Pol = f55
+fSignX = f56
+fDenoBound = f57
+fNormX = f58
+fX8 = f59
+fRSqr = f60
+fRQuadr = f61
+fR8 = f62
+fX16 = f63
+// Data tables
+//==============================================================
+RODATA
+.align 16
+LOCAL_OBJECT_START(asin_base_range_table)
+// Ai: Polynomial coefficients for the asin(x), |x| < .625000
+// Bi: Polynomial coefficients for the asin(x), |x| > .625000
+data8 0xBFDAAB56C01AE468 //A29
+data8 0x3FE1C470B76A5B2B //A31
+data8 0xBFDC5FF82A0C4205 //A33
+data8 0x3FC71FD88BFE93F0 //A35
+data8 0xB504F333F9DE6487, 0x00003FFF //B0
+data8 0xAAAAAAAAAAAAFC18, 0x00003FFC //A3
+data8 0x3F9F1C71BC4A7823 //A9
+data8 0x3F96E8BBAAB216B2 //A11
+data8 0x3F91C4CA1F9F8A98 //A13
+data8 0x3F8C9DDCEDEBE7A6 //A15
+data8 0x3F877784442B1516 //A17
+data8 0x3F859C0491802BA2 //A19
+data8 0x9999999998C88B8F, 0x00003FFB //A5
+data8 0x3F6BD7A9A660BF5E //A21
+data8 0x3F9FC1659340419D //A23
+data8 0xB6DB6DB798149BDF, 0x00003FFA //A7
+data8 0xBFB3EF18964D3ED3 //A25
+data8 0x3FCD285315542CF2 //A27
+data8 0xF15BEEEFF7D2966A, 0x00003FFB //B1
+data8 0x3EF0DDA376D10FB3 //B10
+data8 0xBEB83CAFE05EBAC9 //B11
+data8 0x3F65FFB67B513644 //B4
+data8 0x3F5032FBB86A4501 //B5
+data8 0x3F392162276C7CBA //B6
+data8 0x3F2435949FD98BDF //B7
+data8 0xD93923D7FA08341C, 0x00003FF9 //B2
+data8 0x3F802995B6D90BDB //B3
+data8 0x3F10DF86B341A63F //B8
+data8 0xC90FDAA22168C235, 0x00003FFF // Pi/2
+data8 0x3EFA3EBD6B0ECB9D //B9
+data8 0x3EDE18BA080E9098 //B12
+LOCAL_OBJECT_END(asin_base_range_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(asin)
+asin_unnormal_back:
+{ .mfi
+ getf.d rXBits = f8 // grab bits of input value
+ // set p12 = 1 if x is a NaN, denormal, or zero
+ fclass.m p12, p0 = f8, 0xcf
+ adds rSign = 1, r0
+}
+{ .mfi
+ addl rTblAddr = @ltoff(asin_base_range_table),gp
+ // 1 - x = 1 - |x| for positive x
+ fms.s1 f1mX = f1, f1, f8
+ addl rHalf = 0xFFFE, r0 // exponent of 1/2
+}
+;;
+{ .mfi
+ addl r0625 = 0x3FE4, r0 // high 16 bits of 0.625
+ // set p8 = 1 if x < 0
+ fcmp.lt.s1 p8, p9 = f8, f0
+ shl rSign = rSign, 63 // sign bit
+}
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ // 1 + x = 1 - |x| for negative x
+ fma.s1 f1pX = f1, f1, f8
+ adds rOne = 0x3FF, r0
+}
+;;
+{ .mfi
+ andcm rAbsXBits = rXBits, rSign // bits of |x|
+ fmerge.s fSignX = f8, f1 // signum(x)
+ shl r0625 = r0625, 48 // bits of DP representation of 0.625
+}
+{ .mfb
+ setf.exp fHalf = rHalf // load A2 to FP reg
+ fma.s1 fXSqr = f8, f8, f0 // x^2
+ // branch on special path if x is a NaN, denormal, or zero
+(p12) br.cond.spnt asin_special
+}
+;;
+{ .mfi
+ adds rPiBy2Ptr = 272, rTblAddr
+ nop.f 0
+ shl rOne = rOne, 52 // bits of 1.0
+}
+{ .mfi
+ adds rTmpPtr1 = 16, rTblAddr
+ nop.f 0
+ // set p6 = 1 if |x| < 0.625
+ cmp.lt p6, p7 = rAbsXBits, r0625
+}
+;;
+{ .mfi
+ ldfpd fA29, fA31 = [rTblAddr] // A29, fA31
+ // 1 - x = 1 - |x| for positive x
+(p9) fms.s1 fR = f1, f1, f8
+ // point to coefficient of "near 1" polynomial
+(p7) adds rTmpPtr2 = 176, rTblAddr
+}
+{ .mfi
+ ldfpd fA33, fA35 = [rTmpPtr1], 16 // A33, fA35
+ // 1 + x = 1 - |x| for negative x
+(p8) fma.s1 fR = f1, f1, f8
+(p6) adds rTmpPtr2 = 48, rTblAddr
+}
+;;
+{ .mfi
+ ldfe fB0 = [rTmpPtr1], 16 // B0
+ nop.f 0
+ nop.i 0
+}
+{ .mib
+ adds rTmpPtr3 = 16, rTmpPtr2
+ // set p10 = 1 if |x| = 1.0
+ cmp.eq p10, p0 = rAbsXBits, rOne
+ // branch on special path for |x| = 1.0
+(p10) br.cond.spnt asin_abs_1
+}
+;;
+{ .mfi
+ ldfe fA3 = [rTmpPtr2], 48 // A3 or B1
+ nop.f 0
+ adds rTmpPtr1 = 64, rTmpPtr3
+}
+{ .mib
+ ldfpd fA9, fA11 = [rTmpPtr3], 16 // A9, A11 or B10, B11
+ // set p11 = 1 if |x| > 1.0
+ cmp.gt p11, p0 = rAbsXBits, rOne
+ // branch on special path for |x| > 1.0
+(p11) br.cond.spnt asin_abs_gt_1
+}
+;;
+{ .mfi
+ ldfpd fA17, fA19 = [rTmpPtr2], 16 // A17, A19 or B6, B7
+ // initial approximation of 1 / sqrt(1 - x)
+ frsqrta.s1 f1mXRcp, p0 = f1mX
+ nop.i 0
+}
+{ .mfi
+ ldfpd fA13, fA15 = [rTmpPtr3] // A13, A15 or B4, B5
+ fma.s1 fXCube = fXSqr, f8, f0 // x^3
+ nop.i 0
+}
+;;
+{ .mfi
+ ldfe fA5 = [rTmpPtr2], 48 // A5 or B2
+ // initial approximation of 1 / sqrt(1 + x)
+ frsqrta.s1 f1pXRcp, p0 = f1pX
+ nop.i 0
+}
+{ .mfi
+ ldfpd fA21, fA23 = [rTmpPtr1], 16 // A21, A23 or B3, B8
+ fma.s1 fXQuadr = fXSqr, fXSqr, f0 // x^4
+ nop.i 0
+}
+;;
+{ .mfi
+ ldfe fA7 = [rTmpPtr1] // A7 or Pi/2
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+ nop.i 0
+}
+{ .mfb
+ ldfpd fA25, fA27 = [rTmpPtr2] // A25, A27 or B9, B12
+ nop.f 0
+(p6) br.cond.spnt asin_base_range;
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fH = fHalf, f1mXRcp, f0 // H0 for x > 0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p9) fma.s1 fS = f1mX, f1mXRcp, f0 // S0 for x > 0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fH = fHalf, f1pXRcp, f0 // H0 for x < 0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p8) fma.s1 fS = f1pX, f1pXRcp, f0 // S0 for x > 0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fRQuadr = fRSqr, fRSqr, f0 // R^4
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB11 = fB11, fR, fB10
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB1 = fB1, fR, fB0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB5 = fB5, fR, fB4
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fR, fB6
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fR, fB2
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d0 = 1/2 - H0*S0
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fR8 = fRQuadr, fRQuadr, f0 // R^4
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB9 = fB9, fR, fB8
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB11
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRSqr, fB5
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fB3 = fB3, fRSqr, fB1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H1 = H0 + H0*d0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S1 = S0 + S0*d0
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fma.s1 fPiBy2 = fPiBy2, fSignX, f0 // signum(x)*Pi/2
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fB12 = fB12, fRSqr, fB9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fB7 = fB7, fRQuadr, fB3
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d1 = 1/2 - H1*S1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fSignedS = fSignX, fS, f0 // -signum(x)*S1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fCloseTo1Pol = fB12, fR8, fB7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fH = fH, fD, fH // H2 = H1 + H1*d1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS, fD, fS // S2 = S1 + S1*d1
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // -signum(x)* S2 = -signum(x)*(S1 + S1*d1)
+ fma.s1 fSignedS = fSignedS, fD, fSignedS
+ nop.i 0
+}
+;;
+{.mfi
+ nop.m 0
+ fnma.s1 fD = fH, fS, fHalf // d2 = 1/2 - H2*S2
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // signum(x)*(Pi/2 - PolB*S2)
+ fma.s1 fPiBy2 = fSignedS, fCloseTo1Pol, fPiBy2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // -signum(x)*PolB * S2
+ fma.s1 fCloseTo1Pol = fSignedS, fCloseTo1Pol, f0
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for 0.625 <= |x| < 1
+ fma.d.s0 f8 = fCloseTo1Pol, fD, fPiBy2
+ // exit here for 0.625 <= |x| < 1
+ br.ret.sptk b0
+}
+;;
+
+
+// here if |x| < 0.625
+.align 32
+asin_base_range:
+{ .mfi
+ nop.m 0
+ fma.s1 fA33 = fA33, fXSqr, fA31
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA15 = fA15, fXSqr, fA13
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA29 = fA29, fXSqr, fA27
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXSqr, fA23
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fA21, fXSqr, fA19
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXSqr, fA7
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA5 = fA5, fXSqr, fA3
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA33
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXQuadr, fA15
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX8 = fXQuadr, fXQuadr, f0 // x^8
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA25 = fA25, fXQuadr, fA21
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA9 = fA9, fXQuadr, fA5
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fXQuadr, fA29
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fXSqr, fA11
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fX16 = fX8, fX8, f0 // x^16
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fA35 = fA35, fX8, fA25
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA17 = fA17, fX8, fA9
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ fma.s1 fBaseP = fA35, fX16, fA17
+ nop.i 0
+}
+;;
+{ .mfb
+ nop.m 0
+ // final result for |x| < 0.625
+ fma.d.s0 f8 = fBaseP, fXCube, f8
+ // exit here for |x| < 0.625 path
+ br.ret.sptk b0
+}
+;;
+
+// here if |x| = 1
+// asin(x) = sign(x) * Pi/2
+.align 32
+asin_abs_1:
+{ .mfi
+ ldfe fPiBy2 = [rPiBy2Ptr] // Pi/2
+ nop.f 0
+ nop.i 0
+}
+;;
+{.mfb
+ nop.m 0
+ // result for |x| = 1.0
+ fma.d.s0 f8 = fPiBy2, fSignX, f0
+ // exit here for |x| = 1.0
+ br.ret.sptk b0
+}
+;;
+
+// here if x is a NaN, denormal, or zero
+.align 32
+asin_special:
+{ .mfi
+ nop.m 0
+ // set p12 = 1 if x is a NaN
+ fclass.m p12, p0 = f8, 0xc3
+ nop.i 0
+}
+{ .mlx
+ nop.m 0
+ // smallest positive DP normalized number
+ movl rDenoBound = 0x0010000000000000
+}
+;;
+{ .mfi
+ nop.m 0
+ // set p13 = 1 if x = 0.0
+ fclass.m p13, p0 = f8, 0x07
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8
+ nop.i 0
+}
+;;
+{ .mfb
+ // load smallest normal to FP reg
+ setf.d fDenoBound = rDenoBound
+ // answer if x is a NaN
+(p12) fma.d.s0 f8 = f8,f1,f0
+ // exit here if x is a NaN
+(p12) br.ret.spnt b0
+}
+;;
+{ .mfb
+ nop.m 0
+ nop.f 0
+ // exit here if x = 0.0
+(p13) br.ret.spnt b0
+}
+;;
+// if we still here then x is denormal or unnormal
+{ .mfi
+ nop.m 0
+ // absolute value of normalized x
+ fmerge.s fNormX = f1, fNormX
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+ // set p14 = 1 if normalized x is greater than or
+ // equal to the smallest denormalized value
+ // So, if p14 is set to 1 it means that we deal with
+ // unnormal rather than with "true" denormal
+ fcmp.ge.s1 p14, p0 = fNormX, fDenoBound
+ nop.i 0
+}
+;;
+{ .mfi
+ nop.m 0
+(p14) fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag if x unnormal
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // normalize unnormal input
+(p14) fnorm.s1 f8 = f8
+ // return to the main path
+(p14) br.cond.sptk asin_unnormal_back
+}
+;;
+// if we still here it means that input is "true" denormal
+{ .mfb
+ nop.m 0
+ // final result if x is denormal
+ fma.d.s0 f8 = f8, fXSqr, f8
+ // exit here if x is denormal
+ br.ret.sptk b0
+}
+;;
+
+// here if |x| > 1.0
+// error handler should be called
+.align 32
+asin_abs_gt_1:
+{ .mfi
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 61 // error code
+ frcpa.s0 FR_RESULT, p0 = f0,f0
+ // call error handler routine
+ br.cond.sptk __libm_error_region
+}
+;;
+GLOBAL_IEEE754_END(asin)
+libm_alias_double_other (__asin, asin)
+
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asinf.S b/sysdeps/ia64/fpu/e_asinf.S
new file mode 100644
index 0000000000..aa58188ec5
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_asinf.S
@@ -0,0 +1,675 @@
+.file "asinf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 06/28/00 Improved speed
+// 06/31/00 Changed register allocation because of some duplicate macros
+// moved nan exit bundle up to gain a cycle.
+// 08/08/00 Improved speed by avoiding SIR flush.
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 08/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 10/17/00 Improved speed of x=0 and x=1 paths, set D flag if x denormal.
+// 03/13/01 Corrected sign of imm1 value in dep instruction.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+
+
+// Description
+//=========================================
+// The asinf function computes the arc sine of x in the range [-pi,+pi].
+// A domain error occurs for arguments not in the range [-1,+1].
+// asinf(+-0) returns +-0
+// asinf(x) returns a Nan and raises the invalid exception for |x| >1
+
+// The acosf function returns the arc cosine in the range [0, +pi] radians.
+// A domain error occurs for arguments not in the range [-1,+1].
+// acosf(1) returns +0
+// acosf(x) returns a Nan and raises the invalid exception for |x| >1
+
+
+// |x| <= sqrt(2)/2. get Ax and Bx
+
+// poly_p1 = x p1
+// poly_p3 = x2 p4 + p3
+// poly_p1 = x2 (poly_p1) + x = x2(x p1) + x
+// poly_p2 = x2( poly_p3) + p2 = x2(x2 p4 + p3) + p2
+
+// poly_Ax = x5(x2( poly_p3) + p2) + x2(x p1) + x
+// = x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+
+// poly_p7 = x2 p8 + p7
+// poly_p5 = x2 p6 + p5
+
+// poly_p7 = x4 p9 + (poly_p7)
+// poly_p7 = x4 p9 + (x2 p8 + p7)
+// poly_Bx = x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5
+
+// answer1 = x11(x4 (x4 p9 + (x2 p8 + p7)) + x2 p6 + p5) + x5(x2(x2 p4 + p3) + p2) + x2(x p1) + x
+// = x19 p9 + x17 p8 + x15 p7 x13 p6 + x11 p5 + x9 p4 + x7 p3 + x5 p2 + x3 p1 + x
+
+
+
+// |x| > sqrt(2)/2
+
+// Get z = sqrt(1-x2)
+
+// Get polynomial in t = 1-x2
+
+// t2 = t t
+// t4 = t2 t2
+
+// poly_p4 = t p5 + p4
+// poly_p1 = t p1 + 1
+
+// poly_p6 = t p7 + p6
+// poly_p2 = t p3 + p2
+
+// poly_p8 = t p9 + p8
+
+// poly_p4 = t2 poly_p6 + poly_p4
+// = t2 (t p7 + p6) + (t p5 + p4)
+
+// poly_p2 = t2 poly_p2 + poly_p1
+// = t2 (t p3 + p2) + (t p1 + 1)
+
+// poly_p4 = t4 poly_p8 + poly_p4
+// = t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4))
+
+// P(t) = poly_p2 + t4 poly_p8
+// = t2 (t p3 + p2) + (t p1 + 1) + t4 (t4 (t p9 + p8) + (t2 (t p7 + p6) + (t p5 + p4)))
+// = t3 p3 + t2 p2 + t p1 + 1 + t9 p9 + t8 p8 + t7 p7 + t6 p6 + t5 p5 + t4 p4
+
+
+// answer2 = - sign(x) z P(t) + (sign(x) pi/2)
+//
+
+
+// Assembly macros
+//=========================================
+
+// predicate registers
+//asinf_pred_LEsqrt2by2 = p7
+//asinf_pred_GTsqrt2by2 = p8
+
+// integer registers
+ASINF_Addr1 = r33
+ASINF_Addr2 = r34
+ASINF_GR_1by2 = r35
+
+ASINF_GR_3by2 = r36
+ASINF_GR_5by2 = r37
+
+GR_SAVE_B0 = r38
+GR_SAVE_PFS = r39
+GR_SAVE_GP = r40
+
+GR_Parameter_X = r41
+GR_Parameter_Y = r42
+GR_Parameter_RESULT = r43
+GR_Parameter_TAG = r44
+
+// floating point registers
+
+asinf_y = f32
+asinf_abs_x = f33
+asinf_x2 = f34
+asinf_sgn_x = f35
+
+asinf_1by2 = f36
+asinf_3by2 = f37
+asinf_5by2 = f38
+asinf_coeff_P3 = f39
+asinf_coeff_P8 = f40
+
+asinf_coeff_P1 = f41
+asinf_coeff_P4 = f42
+asinf_coeff_P5 = f43
+asinf_coeff_P2 = f44
+asinf_coeff_P7 = f45
+
+asinf_coeff_P6 = f46
+asinf_coeff_P9 = f47
+asinf_x2 = f48
+asinf_x3 = f49
+asinf_x4 = f50
+
+asinf_x8 = f51
+asinf_x5 = f52
+asinf_const_piby2 = f53
+asinf_const_sqrt2by2 = f54
+asinf_x11 = f55
+
+asinf_poly_p1 = f56
+asinf_poly_p3 = f57
+asinf_sinf1 = f58
+asinf_poly_p2 = f59
+asinf_poly_Ax = f60
+
+asinf_poly_p7 = f61
+asinf_poly_p5 = f62
+asinf_sgnx_t4 = f63
+asinf_poly_Bx = f64
+asinf_t = f65
+
+asinf_yby2 = f66
+asinf_B = f67
+asinf_B2 = f68
+asinf_Az = f69
+asinf_dz = f70
+
+asinf_Sz = f71
+asinf_d2z = f72
+asinf_Fz = f73
+asinf_z = f74
+asinf_sgnx_z = f75
+
+asinf_t2 = f76
+asinf_2poly_p4 = f77
+asinf_2poly_p6 = f78
+asinf_2poly_p1 = f79
+asinf_2poly_p2 = f80
+
+asinf_2poly_p8 = f81
+asinf_t4 = f82
+asinf_Pt = f83
+asinf_sgnx_2poly_p2 = f84
+asinf_sgn_x_piby2 = f85
+
+asinf_poly_p7a = f86
+asinf_2poly_p4a = f87
+asinf_2poly_p4b = f88
+asinf_2poly_p2a = f89
+asinf_poly_p1a = f90
+
+
+
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(asinf_coeff_1_table)
+data8 0x3FC5555607DCF816 // P1
+data8 0x3F9CF81AD9BAB2C6 // P4
+data8 0x3FC59E0975074DF3 // P7
+data8 0xBFA6F4CC2780AA1D // P6
+data8 0x3FC2DD45292E93CB // P9
+data8 0x3fe6a09e667f3bcd // sqrt(2)/2
+LOCAL_OBJECT_END(asinf_coeff_1_table)
+
+LOCAL_OBJECT_START(asinf_coeff_2_table)
+data8 0x3FA6F108E31EFBA6 // P3
+data8 0xBFCA31BF175D82A0 // P8
+data8 0x3FA30C0337F6418B // P5
+data8 0x3FB332C9266CB1F9 // P2
+data8 0x3ff921fb54442d18 // pi_by_2
+LOCAL_OBJECT_END(asinf_coeff_2_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(asinf)
+
+// Load the addresses of the two tables.
+// Then, load the coefficients and other constants.
+
+{ .mfi
+ alloc r32 = ar.pfs,1,8,4,0
+ fnma.s1 asinf_t = f8,f8,f1
+ dep.z ASINF_GR_1by2 = 0x3f,24,8 // 0x3f000000
+}
+{ .mfi
+ addl ASINF_Addr1 = @ltoff(asinf_coeff_1_table),gp
+ fma.s1 asinf_x2 = f8,f8,f0
+ addl ASINF_Addr2 = @ltoff(asinf_coeff_2_table),gp ;;
+}
+
+
+{ .mfi
+ ld8 ASINF_Addr1 = [ASINF_Addr1]
+ fmerge.s asinf_abs_x = f1,f8
+ dep ASINF_GR_3by2 = -1,r0,22,8 // 0x3fc00000
+}
+{ .mlx
+ nop.m 999
+ movl ASINF_GR_5by2 = 0x40200000;;
+}
+
+
+
+{ .mfi
+ setf.s asinf_1by2 = ASINF_GR_1by2
+ fmerge.s asinf_sgn_x = f8,f1
+ nop.i 999
+}
+{ .mfi
+ ld8 ASINF_Addr2 = [ASINF_Addr2]
+ nop.f 0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ setf.s asinf_5by2 = ASINF_GR_5by2
+ fcmp.lt.s1 p11,p12 = f8,f0
+ nop.i 999;;
+}
+
+{ .mmf
+ ldfpd asinf_coeff_P1,asinf_coeff_P4 = [ASINF_Addr1],16
+ setf.s asinf_3by2 = ASINF_GR_3by2
+ fclass.m.unc p8,p0 = f8, 0xc3 ;; //@qnan | @snan
+}
+
+
+{ .mfi
+ ldfpd asinf_coeff_P7,asinf_coeff_P6 = [ASINF_Addr1],16
+ fma.s1 asinf_t2 = asinf_t,asinf_t,f0
+ nop.i 999
+}
+{ .mfi
+ ldfpd asinf_coeff_P3,asinf_coeff_P8 = [ASINF_Addr2],16
+ fma.s1 asinf_x4 = asinf_x2,asinf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfpd asinf_coeff_P9,asinf_const_sqrt2by2 = [ASINF_Addr1]
+ fclass.m.unc p10,p0 = f8, 0x07 //@zero
+ nop.i 999
+}
+{ .mfi
+ ldfpd asinf_coeff_P5,asinf_coeff_P2 = [ASINF_Addr2],16
+ fma.s1 asinf_x3 = f8,asinf_x2,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ ldfd asinf_const_piby2 = [ASINF_Addr2]
+ frsqrta.s1 asinf_B,p0 = asinf_t
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p8) fma.s.s0 f8 = f8,f1,f0
+(p8) br.ret.spnt b0 ;; // Exit if x=nan
+}
+
+
+{ .mfb
+ nop.m 999
+ fcmp.eq.s1 p6,p0 = asinf_abs_x,f1
+(p10) br.ret.spnt b0 ;; // Exit if x=0
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.gt.s1 p9,p0 = asinf_abs_x,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_x8 = asinf_x4,asinf_x4,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 asinf_t4 = asinf_t2,asinf_t2,f0
+(p6) br.cond.spnt ASINF_ABS_ONE ;; // Branch if |x|=1
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_x5 = asinf_x2,asinf_x3,f0
+ nop.i 999
+}
+{ .mfb
+(p9) mov GR_Parameter_TAG = 62
+ fma.s1 asinf_yby2 = asinf_t,asinf_1by2,f0
+(p9) br.cond.spnt __libm_error_region ;; // Branch if |x|>1
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_Az = asinf_t,asinf_B,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_B2 = asinf_B,asinf_B,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p1 = f8,asinf_coeff_P1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p1 = asinf_coeff_P1,asinf_t,f1
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p3 = asinf_coeff_P4,asinf_x2,asinf_coeff_P3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p6 = asinf_coeff_P7,asinf_t,asinf_coeff_P6
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p7 = asinf_x2,asinf_coeff_P8,asinf_coeff_P7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p2 = asinf_coeff_P3,asinf_t,asinf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p5 = asinf_x2,asinf_coeff_P6,asinf_coeff_P5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p4 = asinf_coeff_P5,asinf_t,asinf_coeff_P4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.d.s1 asinf_x11 = asinf_x8,asinf_x3,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 asinf_dz = asinf_B2,asinf_yby2,asinf_1by2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p1a = asinf_x2,asinf_poly_p1,f8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p8 = asinf_coeff_P9,asinf_t,asinf_coeff_P8
+ nop.i 999;;
+}
+
+
+// Get the absolute value of x and determine the region in which x lies
+
+{ .mfi
+ nop.m 999
+ fcmp.le.s1 p7,p8 = asinf_abs_x,asinf_const_sqrt2by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p2 = asinf_x2,asinf_poly_p3,asinf_coeff_P2
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_poly_p7a = asinf_x4,asinf_coeff_P9,asinf_poly_p7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 asinf_2poly_p2a = asinf_2poly_p2,asinf_t2,asinf_2poly_p1
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_sgnx_t4 = asinf_sgn_x,asinf_t4,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_2poly_p4a = asinf_2poly_p6,asinf_t2,asinf_2poly_p4
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_Sz = asinf_5by2,asinf_dz,asinf_3by2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_d2z = asinf_dz,asinf_dz,f0
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_sgn_x_piby2 = asinf_sgn_x,asinf_const_piby2,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.d.s1 asinf_poly_Ax = asinf_x5,asinf_poly_p2,asinf_poly_p1a
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p7) fma.d.s1 asinf_poly_Bx = asinf_x4,asinf_poly_p7a,asinf_poly_p5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_sgnx_2poly_p2 = asinf_sgn_x,asinf_2poly_p2a,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Only purpose is to set D if x denormal
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_2poly_p4b = asinf_2poly_p8,asinf_t4,asinf_2poly_p4a
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 asinf_Fz = asinf_d2z,asinf_Sz,asinf_dz
+ nop.i 999;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 asinf_Pt = asinf_2poly_p4b,asinf_sgnx_t4,asinf_sgnx_2poly_p2
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+(p8) fma.d.s1 asinf_z = asinf_Az,asinf_Fz,asinf_Az
+ nop.i 999;;
+}
+
+.pred.rel "mutex",p8,p7 //asinf_pred_GTsqrt2by2,asinf_pred_LEsqrt2by2
+{ .mfi
+ nop.m 999
+(p8) fnma.s.s0 f8 = asinf_z,asinf_Pt,asinf_sgn_x_piby2
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s.s0 f8 = asinf_x11,asinf_poly_Bx,asinf_poly_Ax
+ br.ret.sptk b0 ;;
+}
+
+ASINF_ABS_ONE:
+// Here for short exit if |x|=1
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = asinf_sgn_x,asinf_const_piby2,f0
+ br.ret.sptk b0
+}
+;;
+
+GLOBAL_IEEE754_END(asinf)
+libm_alias_float_other (__asin, asin)
+
+// Stack operations when calling error support.
+// (1) (2)
+// sp -> + psp -> +
+// | |
+// | | <- GR_Y
+// | |
+// | <-GR_Y Y2->|
+// | |
+// | | <- GR_X
+// | |
+// sp-64 -> + sp -> +
+// save ar.pfs save b0
+// save gp
+
+
+// Stack operations when calling error support.
+// (3) (call) (4)
+// psp -> + sp -> +
+// | |
+// R3 ->| <- GR_RESULT | -> f8
+// | |
+// Y2 ->| <- GR_Y |
+// | |
+// X1 ->| |
+// | |
+// sp -> + +
+// restore gp
+// restore ar.pfs
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mfi
+ nop.m 0
+ frcpa.s0 f9,p0 = f0,f0
+ nop.i 0
+};;
+
+{ .mib
+ stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_asinl.S b/sysdeps/ia64/fpu/e_asinl.S
new file mode 100644
index 0000000000..fce4bafa41
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_asinl.S
@@ -0,0 +1,2523 @@
+.file "asinl.s"
+
+
+// Copyright (c) 2001 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 08/28/01 New version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// long double asinl(long double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// For |s| in [2^{-4}, sqrt(2)/2]:
+// Let t= 2^k*1.b1 b2..b6 1, where s= 2^k*1.b1 b2.. b52
+// asin(s)= asin(t)+asin(r), where r= s*sqrt(1-t^2)-t*sqrt(1-s^2), i.e.
+// r= (s-t)*sqrt(1-t^2)-t*sqrt(1-t^2)*(sqrt((1-s^2)/(1-t^2))-1)
+// asin(r)-r evaluated as 9-degree polynomial (c3*r^3+c5*r^5+c7*r^7+c9*r^9)
+// The 64-bit significands of sqrt(1-t^2), 1/(1-t^2) are read from the table,
+// along with the high and low parts of asin(t) (stored as two double precision
+// values)
+//
+// |s| in (sqrt(2)/2, sqrt(255/256)):
+// Let t= 2^k*1.b1 b2..b6 1, where (1-s^2)*frsqrta(1-s^2)= 2^k*1.b1 b2..b6..
+// asin(|s|)= pi/2-asin(t)+asin(r), r= s*t-sqrt(1-s^2)*sqrt(1-t^2)
+// To minimize accumulated errors, r is computed as
+// r= (t*s)_s-t^2*y*z+z*y*(t^2-1+s^2)_s+z*y*(1-s^2)_s*x+z'*y*(1-s^2)*PS29+
+// +(t*s-(t*s)_s)+z*y*((t^2-1-(t^2-1+s^2)_s)+s^2)+z*y*(1-s^2-(1-s^2)_s)+
+// +ez*z'*y*(1-s^2)*(1-x),
+// where y= frsqrta(1-s^2), z= (sqrt(1-t^2))_s (rounded to 24 significant bits)
+// z'= sqrt(1-t^2), x= ((1-s^2)*y^2-1)/2
+//
+// |s|<2^{-4}: evaluate as 17-degree polynomial
+// (or simply return s, if|s|<2^{-64})
+//
+// |s| in [sqrt(255/256), 1): asin(|s|)= pi/2-asin(sqrt(1-s^2))
+// use 17-degree polynomial for asin(sqrt(1-s^2)),
+// 9-degree polynomial to evaluate sqrt(1-s^2)
+// High order term is (pi/2)_high-(y*(1-s^2))_high
+//
+
+
+
+// Registers used
+//==============================================================
+// f6-f15, f32-f36
+// r2-r3, r23-r23
+// p6, p7, p8, p12
+//
+
+
+ GR_SAVE_B0= r33
+ GR_SAVE_PFS= r34
+ GR_SAVE_GP= r35 // This reg. can safely be used
+ GR_SAVE_SP= r36
+
+ GR_Parameter_X= r37
+ GR_Parameter_Y= r38
+ GR_Parameter_RESULT= r39
+ GR_Parameter_TAG= r40
+
+ FR_X= f10
+ FR_Y= f1
+ FR_RESULT= f8
+
+
+
+RODATA
+
+.align 16
+
+
+
+LOCAL_OBJECT_START(T_table)
+
+// stores 64-bit significand of 1/(1-t^2), 64-bit significand of sqrt(1-t^2),
+// asin(t)_high (double precision), asin(t)_low (double precision)
+
+data8 0x80828692b71c4391, 0xff7ddcec2d87e879
+data8 0x3fb022bc0ae531a0, 0x3c9f599c7bb42af6
+data8 0x80869f0163d0b082, 0xff79cad2247914d3
+data8 0x3fb062dd26afc320, 0x3ca4eff21bd49c5c
+data8 0x808ac7d5a8690705, 0xff75a89ed6b626b9
+data8 0x3fb0a2ff4a1821e0, 0x3cb7e33b58f164cc
+data8 0x808f0112ad8ad2e0, 0xff7176517c2cc0cb
+data8 0x3fb0e32279319d80, 0x3caee31546582c43
+data8 0x80934abba8a1da0a, 0xff6d33e949b1ed31
+data8 0x3fb12346b8101da0, 0x3cb8bfe463d087cd
+data8 0x8097a4d3dbe63d8f, 0xff68e16571015c63
+data8 0x3fb1636c0ac824e0, 0x3c8870a7c5a3556f
+data8 0x809c0f5e9662b3dd, 0xff647ec520bca0f0
+data8 0x3fb1a392756ed280, 0x3c964f1a927461ae
+data8 0x80a08a5f33fadc66, 0xff600c07846a6830
+data8 0x3fb1e3b9fc19e580, 0x3c69eb3576d56332
+data8 0x80a515d91d71acd4, 0xff5b892bc475affa
+data8 0x3fb223e2a2dfbe80, 0x3c6a4e19fd972fb6
+data8 0x80a9b1cfc86ff7cd, 0xff56f631062cf93d
+data8 0x3fb2640c6dd76260, 0x3c62041160e0849e
+data8 0x80ae5e46b78b0d68, 0xff5253166bc17794
+data8 0x3fb2a43761187c80, 0x3cac61651af678c0
+data8 0x80b31b417a4b756b, 0xff4d9fdb14463dc8
+data8 0x3fb2e46380bb6160, 0x3cb06ef23eeba7a1
+data8 0x80b7e8c3ad33c369, 0xff48dc7e1baf6738
+data8 0x3fb32490d0d910c0, 0x3caa05f480b300d5
+data8 0x80bcc6d0f9c784d6, 0xff4408fe9ad13e37
+data8 0x3fb364bf558b3820, 0x3cb01e7e403aaab9
+data8 0x80c1b56d1692492d, 0xff3f255ba75f5f4e
+data8 0x3fb3a4ef12ec3540, 0x3cb4fe8fcdf5f5f1
+data8 0x80c6b49bc72ec446, 0xff3a319453ebd961
+data8 0x3fb3e5200d171880, 0x3caf2dc089b2b7e2
+data8 0x80cbc460dc4e0ae8, 0xff352da7afe64ac6
+data8 0x3fb425524827a720, 0x3cb75a855e7c6053
+data8 0x80d0e4c033bee9c4, 0xff301994c79afb32
+data8 0x3fb46585c83a5e00, 0x3cb3264981c019ab
+data8 0x80d615bdb87556db, 0xff2af55aa431f291
+data8 0x3fb4a5ba916c73c0, 0x3c994251d94427b5
+data8 0x80db575d6291fd8a, 0xff25c0f84bae0cb9
+data8 0x3fb4e5f0a7dbdb20, 0x3cbee2fcc4c786cb
+data8 0x80e0a9a33769e535, 0xff207c6cc0ec09fd
+data8 0x3fb526280fa74620, 0x3c940656e5549b91
+data8 0x80e60c93498e32cd, 0xff1b27b703a19c98
+data8 0x3fb56660ccee2740, 0x3ca7082374d7b2cd
+data8 0x80eb8031b8d4052d, 0xff15c2d6105c72f8
+data8 0x3fb5a69ae3d0b520, 0x3c7c4d46e09ac68a
+data8 0x80f10482b25c6c8a, 0xff104dc8e0813ed4
+data8 0x3fb5e6d6586fec20, 0x3c9aa84ffd9b4958
+data8 0x80f6998a709c7cfb, 0xff0ac88e6a4ab926
+data8 0x3fb627132eed9140, 0x3cbced2cbbbe7d16
+data8 0x80fc3f4d3b657c44, 0xff053325a0c8a2ec
+data8 0x3fb667516b6c34c0, 0x3c6489c5fc68595a
+data8 0x8101f5cf67ed2af8, 0xfeff8d8d73dec2bb
+data8 0x3fb6a791120f33a0, 0x3cbe12acf159dfad
+data8 0x8107bd1558d6291f, 0xfef9d7c4d043df29
+data8 0x3fb6e7d226fabba0, 0x3ca386d099cd0dc7
+data8 0x810d95237e38766a, 0xfef411ca9f80b5f7
+data8 0x3fb72814ae53cc20, 0x3cb9f35731e71dd6
+data8 0x81137dfe55aa0e29, 0xfeee3b9dc7eef009
+data8 0x3fb76858ac403a00, 0x3c74df3dd959141a
+data8 0x811977aa6a479f0f, 0xfee8553d2cb8122c
+data8 0x3fb7a89e24e6b0e0, 0x3ca6034406ee42bc
+data8 0x811f822c54bd5ef8, 0xfee25ea7add46a91
+data8 0x3fb7e8e51c6eb6a0, 0x3cb82f8f78e68ed7
+data8 0x81259d88bb4ffac1, 0xfedc57dc2809fb1d
+data8 0x3fb8292d9700ad60, 0x3cbebb73c0e653f9
+data8 0x812bc9c451e5a257, 0xfed640d974eb6068
+data8 0x3fb8697798c5d620, 0x3ca2feee76a9701b
+data8 0x813206e3da0f3124, 0xfed0199e6ad6b585
+data8 0x3fb8a9c325e852e0, 0x3cb9e88f2f4d0efe
+data8 0x813854ec231172f9, 0xfec9e229dcf4747d
+data8 0x3fb8ea1042932a00, 0x3ca5ff40d81f66fd
+data8 0x813eb3e209ee858f, 0xfec39a7a9b36538b
+data8 0x3fb92a5ef2f247c0, 0x3cb5e3bece4d6b07
+data8 0x814523ca796f56ce, 0xfebd428f72561efe
+data8 0x3fb96aaf3b3281a0, 0x3cb7b9e499436d7c
+data8 0x814ba4aa6a2d3ff9, 0xfeb6da672bd48fe4
+data8 0x3fb9ab011f819860, 0x3cb9168143cc1a7f
+data8 0x81523686e29bbdd7, 0xfeb062008df81f50
+data8 0x3fb9eb54a40e3ac0, 0x3cb6e544197eb1e1
+data8 0x8158d964f7124614, 0xfea9d95a5bcbd65a
+data8 0x3fba2ba9cd080800, 0x3ca9a717be8f7446
+data8 0x815f8d49c9d639e4, 0xfea34073551e1ac8
+data8 0x3fba6c009e9f9260, 0x3c741e989a60938a
+data8 0x8166523a8b24f626, 0xfe9c974a367f785c
+data8 0x3fbaac591d0661a0, 0x3cb2c1290107e57d
+data8 0x816d283c793e0114, 0xfe95ddddb94166cb
+data8 0x3fbaecb34c6ef600, 0x3c9c7d5fbaec405d
+data8 0x81740f54e06d55bd, 0xfe8f142c93750c50
+data8 0x3fbb2d0f310cca00, 0x3cbc09479a9cbcfb
+data8 0x817b07891b15cd5e, 0xfe883a3577e9fceb
+data8 0x3fbb6d6ccf1455e0, 0x3cb9450bff4ee307
+data8 0x818210de91bba6c8, 0xfe814ff7162cf62f
+data8 0x3fbbadcc2abb1180, 0x3c9227fda12a8d24
+data8 0x81892b5abb0f2bf9, 0xfe7a55701a8697b1
+data8 0x3fbbee2d48377700, 0x3cb6fad72acfe356
+data8 0x819057031bf7760e, 0xfe734a9f2dfa1810
+data8 0x3fbc2e902bc10600, 0x3cb4465b588d16ad
+data8 0x819793dd479d4fbe, 0xfe6c2f82f643f68b
+data8 0x3fbc6ef4d9904580, 0x3c8b9ac54823960d
+data8 0x819ee1eedf76367a, 0xfe65041a15d8a92c
+data8 0x3fbcaf5b55dec6a0, 0x3ca2b8d28a954db2
+data8 0x81a6413d934f7a66, 0xfe5dc8632be3477f
+data8 0x3fbcefc3a4e727a0, 0x3c9380da83713ab4
+data8 0x81adb1cf21597d4b, 0xfe567c5cd44431d5
+data8 0x3fbd302dcae51600, 0x3ca995b83421756a
+data8 0x81b533a9563310b8, 0xfe4f2005a78fb50f
+data8 0x3fbd7099cc155180, 0x3caefa2f7a817d5f
+data8 0x81bcc6d20cf4f373, 0xfe47b35c3b0caaeb
+data8 0x3fbdb107acb5ae80, 0x3cb455fc372dd026
+data8 0x81c46b4f2f3d6e68, 0xfe40365f20b316d6
+data8 0x3fbdf177710518c0, 0x3cbee3dcc5b01434
+data8 0x81cc2126b53c1144, 0xfe38a90ce72abf36
+data8 0x3fbe31e91d439620, 0x3cb3e131c950aebd
+data8 0x81d3e85ea5bd8ee2, 0xfe310b6419c9c33a
+data8 0x3fbe725cb5b24900, 0x3c01d3fac6029027
+data8 0x81dbc0fd1637b9c1, 0xfe295d6340932d15
+data8 0x3fbeb2d23e937300, 0x3c6304cc44aeedd1
+data8 0x81e3ab082ad5a0a4, 0xfe219f08e03580b3
+data8 0x3fbef349bc2a77e0, 0x3cac1d2d6abe9c72
+data8 0x81eba6861683cb97, 0xfe19d0537a0946e2
+data8 0x3fbf33c332bbe020, 0x3ca0909dba4e96ca
+data8 0x81f3b37d1afc9979, 0xfe11f1418c0f94e2
+data8 0x3fbf743ea68d5b60, 0x3c937fc12a2a779a
+data8 0x81fbd1f388d4be45, 0xfe0a01d190f09063
+data8 0x3fbfb4bc1be5c340, 0x3cbf51a504b55813
+data8 0x820401efbf87e248, 0xfe020201fff9efea
+data8 0x3fbff53b970d1e80, 0x3ca625444b260078
+data8 0x82106ad2ffdca049, 0xfdf5e3940a49135e
+data8 0x3fc02aff52065460, 0x3c9125d113e22a57
+data8 0x8221343d6ea1d3e2, 0xfde581a45429b0a0
+data8 0x3fc06b84f8e03220, 0x3caccf362295894b
+data8 0x82324434adbf99c2, 0xfdd4de1a001fb775
+data8 0x3fc0ac0ed1fe7240, 0x3cc22f676096b0af
+data8 0x82439aee8d0c7747, 0xfdc3f8e8269d1f03
+data8 0x3fc0ec9cee9e4820, 0x3cca147e2886a628
+data8 0x825538a1d0fcb2f0, 0xfdb2d201a9b1ba66
+data8 0x3fc12d2f6006f0a0, 0x3cc72b36633bc2d4
+data8 0x82671d86345c5cee, 0xfda1695934d723e7
+data8 0x3fc16dc63789de60, 0x3cb11f9c47c7b83f
+data8 0x827949d46a121770, 0xfd8fbee13cbbb823
+data8 0x3fc1ae618682e620, 0x3cce1b59020cef8e
+data8 0x828bbdc61eeab9ba, 0xfd7dd28bff0c9f34
+data8 0x3fc1ef015e586c40, 0x3cafec043e0225ee
+data8 0x829e7995fb6de9e1, 0xfd6ba44b823ee1ca
+data8 0x3fc22fa5d07b90c0, 0x3cba905409caf8e3
+data8 0x82b17d7fa5bbc982, 0xfd5934119557883a
+data8 0x3fc2704eee685da0, 0x3cb5ef21838a823e
+data8 0x82c4c9bfc373d276, 0xfd4681cfcfb2c161
+data8 0x3fc2b0fcc9a5f3e0, 0x3ccc7952c5e0e312
+data8 0x82d85e93fba50136, 0xfd338d7790ca0f41
+data8 0x3fc2f1af73c6ba00, 0x3cbecf5f977d1ca9
+data8 0x82ec3c3af8c76b32, 0xfd2056f9fff97727
+data8 0x3fc33266fe6889a0, 0x3c9d329c022ebdb5
+data8 0x830062f46abf6022, 0xfd0cde480c43b327
+data8 0x3fc373237b34de60, 0x3cc95806d4928adb
+data8 0x8314d30108ea35f0, 0xfcf923526c1562b2
+data8 0x3fc3b3e4fbe10520, 0x3cbc299fe7223d54
+data8 0x83298ca29434df97, 0xfce526099d0737ed
+data8 0x3fc3f4ab922e4a60, 0x3cb59d8bb8fdbccc
+data8 0x833e901bd93c7009, 0xfcd0e65de39f1f7c
+data8 0x3fc435774fea2a60, 0x3c9ec18b43340914
+data8 0x8353ddb0b278aad8, 0xfcbc643f4b106055
+data8 0x3fc4764846ee80a0, 0x3cb90402efd87ed6
+data8 0x836975a60a70c52e, 0xfca79f9da4fab13a
+data8 0x3fc4b71e8921b860, 0xbc58f23449ed6365
+data8 0x837f5841ddfa7a46, 0xfc92986889284148
+data8 0x3fc4f7fa2876fca0, 0xbc6294812bf43acd
+data8 0x839585cb3e839773, 0xfc7d4e8f554ab12f
+data8 0x3fc538db36ee6960, 0x3cb910b773d4c578
+data8 0x83abfe8a5466246f, 0xfc67c2012cb6fa68
+data8 0x3fc579c1c6953cc0, 0x3cc5ede909fc47fc
+data8 0x83c2c2c861474d91, 0xfc51f2acf82041d5
+data8 0x3fc5baade9860880, 0x3cac63cdfc3588e5
+data8 0x83d9d2cfc2813637, 0xfc3be08165519325
+data8 0x3fc5fb9fb1e8e3a0, 0x3cbf7c8466578c29
+data8 0x83f12eebf397daac, 0xfc258b6ce6e6822f
+data8 0x3fc63c9731f39d40, 0x3cb6d2a7ffca3e9e
+data8 0x8408d76990b9296e, 0xfc0ef35db402af94
+data8 0x3fc67d947be9eec0, 0x3cb1980da09e6566
+data8 0x8420cc9659487cd7, 0xfbf81841c8082dc4
+data8 0x3fc6be97a21daf00, 0x3cc2ac8330e59aa5
+data8 0x84390ec132759ecb, 0xfbe0fa06e24cc390
+data8 0x3fc6ffa0b6ef05e0, 0x3ccc1a030fee56c4
+data8 0x84519e3a29df811a, 0xfbc9989a85ce0954
+data8 0x3fc740afcccca000, 0x3cc19692a5301ca6
+data8 0x846a7b527842d61b, 0xfbb1f3e9f8e45dc4
+data8 0x3fc781c4f633e2c0, 0x3cc0e98f3868a508
+data8 0x8483a65c8434b5f0, 0xfb9a0be244f4af45
+data8 0x3fc7c2e045b12140, 0x3cb2a8d309754420
+data8 0x849d1fabe4e97dd7, 0xfb81e070362116d1
+data8 0x3fc80401cddfd120, 0x3ca7a44544aa4ce6
+data8 0x84b6e795650817ea, 0xfb6971805af8411e
+data8 0x3fc84529a16ac020, 0x3c9e3b709c7d6f94
+data8 0x84d0fe6f0589da92, 0xfb50beff0423a2f5
+data8 0x3fc88657d30c49e0, 0x3cc60d65a7f0a278
+data8 0x84eb649000a73014, 0xfb37c8d84414755c
+data8 0x3fc8c78c758e8e80, 0x3cc94b2ee984c2b7
+data8 0x85061a50ccd13781, 0xfb1e8ef7eeaf764b
+data8 0x3fc908c79bcba900, 0x3cc8540ae794a2fe
+data8 0x8521200b1fb8916e, 0xfb05114998f76a83
+data8 0x3fc94a0958ade6c0, 0x3ca127f49839fa9c
+data8 0x853c7619f1618bf6, 0xfaeb4fb898b65d19
+data8 0x3fc98b51bf2ffee0, 0x3c8c9ba7a803909a
+data8 0x85581cd97f45e274, 0xfad14a3004259931
+data8 0x3fc9cca0e25d4ac0, 0x3cba458e91d3bf54
+data8 0x857414a74f8446b4, 0xfab7009ab1945a54
+data8 0x3fca0df6d551fe80, 0x3cc78ea1d329d2b2
+data8 0x85905de2341dea46, 0xfa9c72e3370d2fbc
+data8 0x3fca4f53ab3b6200, 0x3ccf60dca86d57ef
+data8 0x85acf8ea4e423ff8, 0xfa81a0f3e9fa0ee9
+data8 0x3fca90b777580aa0, 0x3ca4c4e2ec8a867e
+data8 0x85c9e62111a92e7d, 0xfa668ab6dec711b1
+data8 0x3fcad2224cf814e0, 0x3c303de5980d071c
+data8 0x85e725e947fbee97, 0xfa4b3015e883dbfe
+data8 0x3fcb13943f7d5f80, 0x3cc29d4eefa5cb1e
+data8 0x8604b8a7144cd054, 0xfa2f90fa9883a543
+data8 0x3fcb550d625bc6a0, 0x3c9e01a746152daf
+data8 0x86229ebff69e2415, 0xfa13ad4e3dfbe1c1
+data8 0x3fcb968dc9195ea0, 0x3ccc091bd73ae518
+data8 0x8640d89acf78858c, 0xf9f784f9e5a1877b
+data8 0x3fcbd815874eb160, 0x3cb5f4b89875e187
+data8 0x865f669fe390c7f5, 0xf9db17e65944eacf
+data8 0x3fcc19a4b0a6f9c0, 0x3cc5c0bc2b0bbf14
+data8 0x867e4938df7dc45f, 0xf9be65fc1f6c2e6e
+data8 0x3fcc5b3b58e061e0, 0x3cc1ca70df8f57e7
+data8 0x869d80d0db7e4c0c, 0xf9a16f237aec427a
+data8 0x3fcc9cd993cc4040, 0x3cbae93acc85eccf
+data8 0x86bd0dd45f4f8265, 0xf98433446a806e70
+data8 0x3fccde7f754f5660, 0x3cb22f70e64568d0
+data8 0x86dcf0b16613e37a, 0xf966b246a8606170
+data8 0x3fcd202d11620fa0, 0x3c962030e5d4c849
+data8 0x86fd29d7624b3d5d, 0xf948ec11a9d4c45b
+data8 0x3fcd61e27c10c0a0, 0x3cc7083c91d59217
+data8 0x871db9b741dbe44a, 0xf92ae08c9eca4941
+data8 0x3fcda39fc97be7c0, 0x3cc9258579e57211
+data8 0x873ea0c3722d6af2, 0xf90c8f9e71633363
+data8 0x3fcde5650dd86d60, 0x3ca4755a9ea582a9
+data8 0x875fdf6fe45529e8, 0xf8edf92dc5875319
+data8 0x3fce27325d6fe520, 0x3cbc1e2b6c1954f9
+data8 0x878176321154e2bc, 0xf8cf1d20f87270b8
+data8 0x3fce6907cca0d060, 0x3cb6ca4804750830
+data8 0x87a36580fe6bccf5, 0xf8affb5e20412199
+data8 0x3fceaae56fdee040, 0x3cad6b310d6fd46c
+data8 0x87c5add5417a5cb9, 0xf89093cb0b7c0233
+data8 0x3fceeccb5bb33900, 0x3cc16e99cedadb20
+data8 0x87e84fa9057914ca, 0xf870e64d40a15036
+data8 0x3fcf2eb9a4bcb600, 0x3cc75ee47c8b09e9
+data8 0x880b4b780f02b709, 0xf850f2c9fdacdf78
+data8 0x3fcf70b05fb02e20, 0x3cad6350d379f41a
+data8 0x882ea1bfc0f228ac, 0xf830b926379e6465
+data8 0x3fcfb2afa158b8a0, 0x3cce0ccd9f829985
+data8 0x885252ff21146108, 0xf810394699fe0e8e
+data8 0x3fcff4b77e97f3e0, 0x3c9b30faa7a4c703
+data8 0x88765fb6dceebbb3, 0xf7ef730f865f6df0
+data8 0x3fd01b6406332540, 0x3cdc5772c9e0b9bd
+data8 0x88ad1f69be2cc730, 0xf7bdc59bc9cfbd97
+data8 0x3fd04cf8ad203480, 0x3caeef44fe21a74a
+data8 0x88f763f70ae2245e, 0xf77a91c868a9c54e
+data8 0x3fd08f23ce0162a0, 0x3cd6290ab3fe5889
+data8 0x89431fc7bc0c2910, 0xf73642973c91298e
+data8 0x3fd0d1610f0c1ec0, 0x3cc67401a01f08cf
+data8 0x8990573407c7738e, 0xf6f0d71d1d7a2dd6
+data8 0x3fd113b0c65d88c0, 0x3cc7aa4020fe546f
+data8 0x89df0eb108594653, 0xf6aa4e6a05cfdef2
+data8 0x3fd156134ada6fe0, 0x3cc87369da09600c
+data8 0x8a2f4ad16e0ed78a, 0xf662a78900c35249
+data8 0x3fd19888f43427a0, 0x3cc62b220f38e49c
+data8 0x8a811046373e0819, 0xf619e180181d97cc
+data8 0x3fd1db121aed7720, 0x3ca3ede7490b52f4
+data8 0x8ad463df6ea0fa2c, 0xf5cffb504190f9a2
+data8 0x3fd21daf185fa360, 0x3caafad98c1d6c1b
+data8 0x8b294a8cf0488daf, 0xf584f3f54b8604e6
+data8 0x3fd2606046bf95a0, 0x3cdb2d704eeb08fa
+data8 0x8b7fc95f35647757, 0xf538ca65c960b582
+data8 0x3fd2a32601231ec0, 0x3cc661619fa2f126
+data8 0x8bd7e588272276f8, 0xf4eb7d92ff39fccb
+data8 0x3fd2e600a3865760, 0x3c8a2a36a99aca4a
+data8 0x8c31a45bf8e9255e, 0xf49d0c68cd09b689
+data8 0x3fd328f08ad12000, 0x3cb9efaf1d7ab552
+data8 0x8c8d0b520a35eb18, 0xf44d75cd993cfad2
+data8 0x3fd36bf614dcc040, 0x3ccacbb590bef70d
+data8 0x8cea2005d068f23d, 0xf3fcb8a23ab4942b
+data8 0x3fd3af11a079a6c0, 0x3cd9775872cf037d
+data8 0x8d48e837c8cd5027, 0xf3aad3c1e2273908
+data8 0x3fd3f2438d754b40, 0x3ca03304f667109a
+data8 0x8da969ce732f3ac7, 0xf357c60202e2fd7e
+data8 0x3fd4358c3ca032e0, 0x3caecf2504ff1a9d
+data8 0x8e0baad75555e361, 0xf3038e323ae9463a
+data8 0x3fd478ec0fd419c0, 0x3cc64bdc3d703971
+data8 0x8e6fb18807ba877e, 0xf2ae2b1c3a6057f7
+data8 0x3fd4bc6369fa40e0, 0x3cbb7122ec245cf2
+data8 0x8ed5843f4bda74d5, 0xf2579b83aa556f0c
+data8 0x3fd4fff2af11e2c0, 0x3c9cfa2dc792d394
+data8 0x8f3d29862c861fef, 0xf1ffde2612ca1909
+data8 0x3fd5439a4436d000, 0x3cc38d46d310526b
+data8 0x8fa6a81128940b2d, 0xf1a6f1bac0075669
+data8 0x3fd5875a8fa83520, 0x3cd8bf59b8153f8a
+data8 0x901206c1686317a6, 0xf14cd4f2a730d480
+data8 0x3fd5cb33f8cf8ac0, 0x3c9502b5c4d0e431
+data8 0x907f4ca5fe9cf739, 0xf0f186784a125726
+data8 0x3fd60f26e847b120, 0x3cc8a1a5e0acaa33
+data8 0x90ee80fd34aeda5e, 0xf09504ef9a212f18
+data8 0x3fd65333c7e43aa0, 0x3cae5b029cb1f26e
+data8 0x915fab35e37421c6, 0xf0374ef5daab5c45
+data8 0x3fd6975b02b8e360, 0x3cd5aa1c280c45e6
+data8 0x91d2d2f0d894d73c, 0xefd86321822dbb51
+data8 0x3fd6db9d05213b20, 0x3cbecf2c093ccd8b
+data8 0x9248000249200009, 0xef7840021aca5a72
+data8 0x3fd71ffa3cc87fc0, 0x3cb8d273f08d00d9
+data8 0x92bf3a7351f081d2, 0xef16e42021d7cbd5
+data8 0x3fd7647318b1ad20, 0x3cbce099d79cdc46
+data8 0x93388a8386725713, 0xeeb44dfce6820283
+data8 0x3fd7a908093fc1e0, 0x3ccb033ec17a30d9
+data8 0x93b3f8aa8e653812, 0xee507c126774fa45
+data8 0x3fd7edb9803e3c20, 0x3cc10aedb48671eb
+data8 0x94318d99d341ade4, 0xedeb6cd32f891afb
+data8 0x3fd83287f0e9cf80, 0x3c994c0c1505cd2a
+data8 0x94b1523e3dedc630, 0xed851eaa3168f43c
+data8 0x3fd87773cff956e0, 0x3cda3b7bce6a6b16
+data8 0x95334fc20577563f, 0xed1d8ffaa2279669
+data8 0x3fd8bc7d93a70440, 0x3cd4922edc792ce2
+data8 0x95b78f8e8f92f274, 0xecb4bf1fd2be72da
+data8 0x3fd901a5b3b9cf40, 0x3cd3fea1b00f9d0d
+data8 0x963e1b4e63a87c3f, 0xec4aaa6d08694cc1
+data8 0x3fd946eca98f2700, 0x3cdba4032d968ff1
+data8 0x96c6fcef314074fc, 0xebdf502d53d65fea
+data8 0x3fd98c52f024e800, 0x3cbe7be1ab8c95c9
+data8 0x97523ea3eab028b2, 0xeb72aea36720793e
+data8 0x3fd9d1d904239860, 0x3cd72d08a6a22b70
+data8 0x97dfeae6f4ee4a9a, 0xeb04c4096a884e94
+data8 0x3fda177f63e8ef00, 0x3cd818c3c1ebfac7
+data8 0x98700c7c6d85d119, 0xea958e90cfe1efd7
+data8 0x3fda5d468f92a540, 0x3cdf45fbfaa080fe
+data8 0x9902ae7487a9caa1, 0xea250c6224aab21a
+data8 0x3fdaa32f090998e0, 0x3cd715a9353cede4
+data8 0x9997dc2e017a9550, 0xe9b33b9ce2bb7638
+data8 0x3fdae939540d3f00, 0x3cc545c014943439
+data8 0x9a2fa158b29b649b, 0xe9401a573f8aa706
+data8 0x3fdb2f65f63f6c60, 0x3cd4a63c2f2ca8e2
+data8 0x9aca09f835466186, 0xe8cba69df9f0bf35
+data8 0x3fdb75b5773075e0, 0x3cda310ce1b217ec
+data8 0x9b672266ab1e0136, 0xe855de74266193d4
+data8 0x3fdbbc28606babc0, 0x3cdc84b75cca6c44
+data8 0x9c06f7579f0b7bd5, 0xe7debfd2f98c060b
+data8 0x3fdc02bf3d843420, 0x3cd225d967ffb922
+data8 0x9ca995db058cabdc, 0xe76648a991511c6e
+data8 0x3fdc497a9c224780, 0x3cde08101c5b825b
+data8 0x9d4f0b605ce71e88, 0xe6ec76dcbc02d9a7
+data8 0x3fdc905b0c10d420, 0x3cb1abbaa3edf120
+data8 0x9df765b9eecad5e6, 0xe6714846bdda7318
+data8 0x3fdcd7611f4b8a00, 0x3cbf6217ae80aadf
+data8 0x9ea2b320350540fe, 0xe5f4bab71494cd6b
+data8 0x3fdd1e8d6a0d56c0, 0x3cb726e048cc235c
+data8 0x9f51023562fc5676, 0xe576cbf239235ecb
+data8 0x3fdd65e082df5260, 0x3cd9e66872bd5250
+data8 0xa002620915c2a2f6, 0xe4f779b15f5ec5a7
+data8 0x3fddad5b02a82420, 0x3c89743b0b57534b
+data8 0xa0b6e21c2caf9992, 0xe476c1a233a7873e
+data8 0x3fddf4fd84bbe160, 0x3cbf7adea9ee3338
+data8 0xa16e9264cc83a6b2, 0xe3f4a16696608191
+data8 0x3fde3cc8a6ec6ee0, 0x3cce46f5a51f49c6
+data8 0xa22983528f3d8d49, 0xe3711694552da8a8
+data8 0x3fde84bd099a6600, 0x3cdc78f6490a2d31
+data8 0xa2e7c5d2e2e69460, 0xe2ec1eb4e1e0a5fb
+data8 0x3fdeccdb4fc685c0, 0x3cdd3aedb56a4825
+data8 0xa3a96b5599bd2532, 0xe265b74506fbe1c9
+data8 0x3fdf15241f23b3e0, 0x3cd440f3c6d65f65
+data8 0xa46e85d1ae49d7de, 0xe1ddddb499b3606f
+data8 0x3fdf5d98202994a0, 0x3cd6c44bd3fb745a
+data8 0xa53727ca3e11b99e, 0xe1548f662951b00d
+data8 0x3fdfa637fe27bf60, 0x3ca8ad1cd33054dd
+data8 0xa6036453bdc20186, 0xe0c9c9aeabe5e481
+data8 0x3fdfef0467599580, 0x3cc0f1ac0685d78a
+data8 0xa6d34f1969dda338, 0xe03d89d5281e4f81
+data8 0x3fe01bff067d6220, 0x3cc0731e8a9ef057
+data8 0xa7a6fc62f7246ff3, 0xdfafcd125c323f54
+data8 0x3fe04092d1ae3b40, 0x3ccabda24b59906d
+data8 0xa87e811a861df9b9, 0xdf20909061bb9760
+data8 0x3fe0653df0fd9fc0, 0x3ce94c8dcc722278
+data8 0xa959f2d2dd687200, 0xde8fd16a4e5f88bd
+data8 0x3fe08a00c1cae320, 0x3ce6b888bb60a274
+data8 0xaa3967cdeea58bda, 0xddfd8cabd1240d22
+data8 0x3fe0aedba3221c00, 0x3ced5941cd486e46
+data8 0xab904fd587263c84, 0xdd1f4472e1cf64ed
+data8 0x3fe0e651e85229c0, 0x3cdb6701042299b1
+data8 0xad686d44dd5a74bb, 0xdbf173e1f6b46e92
+data8 0x3fe1309cbf4cdb20, 0x3cbf1be7bb3f0ec5
+data8 0xaf524e15640ebee4, 0xdabd54896f1029f6
+data8 0x3fe17b4ee1641300, 0x3ce81dd055b792f1
+data8 0xb14eca24ef7db3fa, 0xd982cb9ae2f47e41
+data8 0x3fe1c66b9ffd6660, 0x3cd98ea31eb5ddc7
+data8 0xb35ec807669920ce, 0xd841bd1b8291d0b6
+data8 0x3fe211f66db3a5a0, 0x3ca480c35a27b4a2
+data8 0xb5833e4755e04dd1, 0xd6fa0bd3150b6930
+data8 0x3fe25df2e05b6c40, 0x3ca4bc324287a351
+data8 0xb7bd34c8000b7bd3, 0xd5ab9939a7d23aa1
+data8 0x3fe2aa64b32f7780, 0x3cba67314933077c
+data8 0xba0dc64d126cc135, 0xd4564563ce924481
+data8 0x3fe2f74fc9289ac0, 0x3cec1a1dc0efc5ec
+data8 0xbc76222cbbfa74a6, 0xd2f9eeed501125a8
+data8 0x3fe344b82f859ac0, 0x3ceeef218de413ac
+data8 0xbef78e31985291a9, 0xd19672e2182f78be
+data8 0x3fe392a22087b7e0, 0x3cd2619ba201204c
+data8 0xc19368b2b0629572, 0xd02baca5427e436a
+data8 0x3fe3e11206694520, 0x3cb5d0b3143fe689
+data8 0xc44b2ae8c6733e51, 0xceb975d60b6eae5d
+data8 0x3fe4300c7e945020, 0x3cbd367143da6582
+data8 0xc7206b894212dfef, 0xcd3fa6326ff0ac9a
+data8 0x3fe47f965d201d60, 0x3ce797c7a4ec1d63
+data8 0xca14e1b0622de526, 0xcbbe13773c3c5338
+data8 0x3fe4cfb4b09d1a20, 0x3cedfadb5347143c
+data8 0xcd2a6825eae65f82, 0xca34913d425a5ae9
+data8 0x3fe5206cc637e000, 0x3ce2798b38e54193
+data8 0xd06301095e1351ee, 0xc8a2f0d3679c08c0
+data8 0x3fe571c42e3d0be0, 0x3ccd7cb9c6c2ca68
+data8 0xd3c0d9f50057adda, 0xc70901152d59d16b
+data8 0x3fe5c3c0c108f940, 0x3ceb6c13563180ab
+data8 0xd74650a98cc14789, 0xc5668e3d4cbf8828
+data8 0x3fe61668a46ffa80, 0x3caa9092e9e3c0e5
+data8 0xdaf5f8579dcc8f8f, 0xc3bb61b3eed42d02
+data8 0x3fe669c251ad69e0, 0x3cccf896ef3b4fee
+data8 0xded29f9f9a6171b4, 0xc20741d7f8e8e8af
+data8 0x3fe6bdd49bea05c0, 0x3cdc6b29937c575d
+data8 0xe2df5765854ccdb0, 0xc049f1c2d1b8014b
+data8 0x3fe712a6b76c6e80, 0x3ce1ddc6f2922321
+data8 0xe71f7a9b94fcb4c3, 0xbe833105ec291e91
+data8 0x3fe76840418978a0, 0x3ccda46e85432c3d
+data8 0xeb96b72d3374b91e, 0xbcb2bb61493b28b3
+data8 0x3fe7bea9496d5a40, 0x3ce37b42ec6e17d3
+data8 0xf049183c3f53c39b, 0xbad848720223d3a8
+data8 0x3fe815ea59dab0a0, 0x3cb03ad41bfc415b
+data8 0xf53b11ec7f415f15, 0xb8f38b57c53c9c48
+data8 0x3fe86e0c84010760, 0x3cc03bfcfb17fe1f
+data8 0xfa718f05adbf2c33, 0xb70432500286b185
+data8 0x3fe8c7196b9225c0, 0x3ced99fcc6866ba9
+data8 0xfff200c3f5489608, 0xb509e6454dca33cc
+data8 0x3fe9211b54441080, 0x3cb789cb53515688
+// The following table entries are not used
+//data8 0x82e138a0fac48700, 0xb3044a513a8e6132
+//data8 0x3fe97c1d30f5b7c0, 0x3ce1eb765612d1d0
+//data8 0x85f4cc7fc670d021, 0xb0f2fb2ea6cbbc88
+//data8 0x3fe9d82ab4b5fde0, 0x3ced3fe6f27e8039
+//data8 0x89377c1387d5b908, 0xaed58e9a09014d5c
+//data8 0x3fea355065f87fa0, 0x3cbef481d25f5b58
+//data8 0x8cad7a2c98dec333, 0xacab929ce114d451
+//data8 0x3fea939bb451e2a0, 0x3c8e92b4fbf4560f
+//data8 0x905b7dfc99583025, 0xaa748cc0dbbbc0ec
+//data8 0x3feaf31b11270220, 0x3cdced8c61bd7bd5
+//data8 0x9446d8191f80dd42, 0xa82ff92687235baf
+//data8 0x3feb53de0bcffc20, 0x3cbe1722fb47509e
+//data8 0x98758ba086e4000a, 0xa5dd497a9c184f58
+//data8 0x3febb5f571cb0560, 0x3ce0c7774329a613
+//data8 0x9cee6c7bf18e4e24, 0xa37be3c3cd1de51b
+//data8 0x3fec197373bc7be0, 0x3ce08ebdb55c3177
+//data8 0xa1b944000a1b9440, 0xa10b2101b4f27e03
+//data8 0x3fec7e6bd023da60, 0x3ce5fc5fd4995959
+//data8 0xa6defd8ba04d3e38, 0x9e8a4b93cad088ec
+//data8 0x3fece4f404e29b20, 0x3cea3413401132b5
+//data8 0xac69dd408a10c62d, 0x9bf89d5d17ddae8c
+//data8 0x3fed4d2388f63600, 0x3cd5a7fb0d1d4276
+//data8 0xb265c39cbd80f97a, 0x99553d969fec7beb
+//data8 0x3fedb714101e0a00, 0x3cdbda21f01193f2
+//data8 0xb8e081a16ae4ae73, 0x969f3e3ed2a0516c
+//data8 0x3fee22e1da97bb00, 0x3ce7231177f85f71
+//data8 0xbfea427678945732, 0x93d5990f9ee787af
+//data8 0x3fee90ac13b18220, 0x3ce3c8a5453363a5
+//data8 0xc79611399b8c90c5, 0x90f72bde80febc31
+//data8 0x3fef009542b712e0, 0x3ce218fd79e8cb56
+//data8 0xcffa8425040624d7, 0x8e02b4418574ebed
+//data8 0x3fef72c3d2c57520, 0x3cd32a717f82203f
+//data8 0xd93299cddcf9cf23, 0x8af6ca48e9c44024
+//data8 0x3fefe762b77744c0, 0x3ce53478a6bbcf94
+//data8 0xe35eda760af69ad9, 0x87d1da0d7f45678b
+//data8 0x3ff02f511b223c00, 0x3ced6e11782c28fc
+//data8 0xeea6d733421da0a6, 0x84921bbe64ae029a
+//data8 0x3ff06c5c6f8ce9c0, 0x3ce71fc71c1ffc02
+//data8 0xfb3b2c73fc6195cc, 0x813589ba3a5651b6
+//data8 0x3ff0aaf2613700a0, 0x3cf2a72d2fd94ef3
+//data8 0x84ac1fcec4203245, 0xfb73a828893df19e
+//data8 0x3ff0eb367c3fd600, 0x3cf8054c158610de
+//data8 0x8ca50621110c60e6, 0xf438a14c158d867c
+//data8 0x3ff12d51caa6b580, 0x3ce6bce9748739b6
+//data8 0x95b8c2062d6f8161, 0xecb3ccdd37b369da
+//data8 0x3ff1717418520340, 0x3ca5c2732533177c
+//data8 0xa0262917caab4ad1, 0xe4dde4ddc81fd119
+//data8 0x3ff1b7d59dd40ba0, 0x3cc4c7c98e870ff5
+//data8 0xac402c688b72f3f4, 0xdcae469be46d4c8d
+//data8 0x3ff200b93cc5a540, 0x3c8dd6dc1bfe865a
+//data8 0xba76968b9eabd9ab, 0xd41a8f3df1115f7f
+//data8 0x3ff24c6f8f6affa0, 0x3cf1acb6d2a7eff7
+//data8 0xcb63c87c23a71dc5, 0xcb161074c17f54ec
+//data8 0x3ff29b5b338b7c80, 0x3ce9b5845f6ec746
+//data8 0xdfe323b8653af367, 0xc19107d99ab27e42
+//data8 0x3ff2edf6fac7f5a0, 0x3cf77f961925fa02
+//data8 0xf93746caaba3e1f1, 0xb777744a9df03bff
+//data8 0x3ff344df237486c0, 0x3cf6ddf5f6ddda43
+//data8 0x8ca77052f6c340f0, 0xacaf476f13806648
+//data8 0x3ff3a0dfa4bb4ae0, 0x3cfee01bbd761bff
+//data8 0xa1a48604a81d5c62, 0xa11575d30c0aae50
+//data8 0x3ff4030b73c55360, 0x3cf1cf0e0324d37c
+//data8 0xbe45074b05579024, 0x9478e362a07dd287
+//data8 0x3ff46ce4c738c4e0, 0x3ce3179555367d12
+//data8 0xe7a08b5693d214ec, 0x8690e3575b8a7c3b
+//data8 0x3ff4e0a887c40a80, 0x3cfbd5d46bfefe69
+//data8 0x94503d69396d91c7, 0xedd2ce885ff04028
+//data8 0x3ff561ebd9c18cc0, 0x3cf331bd176b233b
+//data8 0xced1d96c5bb209e6, 0xc965278083808702
+//data8 0x3ff5f71d7ff42c80, 0x3ce3301cc0b5a48c
+//data8 0xabac2cee0fc24e20, 0x9c4eb1136094cbbd
+//data8 0x3ff6ae4c63222720, 0x3cf5ff46874ee51e
+//data8 0x8040201008040201, 0xb4d7ac4d9acb1bf4
+//data8 0x3ff7b7d33b928c40, 0x3cfacdee584023bb
+LOCAL_OBJECT_END(T_table)
+
+
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+ // C_3
+data8 0xaaaaaaaaaaaaaaab, 0x0000000000003ffc
+ // C_5
+data8 0x999999999999999a, 0x0000000000003ffb
+ // C_7, C_9
+data8 0x3fa6db6db6db6db7, 0x3f9f1c71c71c71c8
+ // pi/2 (low, high)
+data8 0x3C91A62633145C07, 0x3FF921FB54442D18
+ // C_11, C_13
+data8 0x3f96e8ba2e8ba2e9, 0x3f91c4ec4ec4ec4e
+ // C_15, C_17
+data8 0x3f8c99999999999a, 0x3f87a87878787223
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+R_DBL_S = r21
+R_EXP0 = r22
+R_EXP = r15
+R_SGNMASK = r23
+R_TMP = r24
+R_TMP2 = r25
+R_INDEX = r26
+R_TMP3 = r27
+R_TMP03 = r27
+R_TMP4 = r28
+R_TMP5 = r23
+R_TMP6 = r22
+R_TMP7 = r21
+R_T = r29
+R_BIAS = r20
+
+F_T = f6
+F_1S2 = f7
+F_1S2_S = f9
+F_INV_1T2 = f10
+F_SQRT_1T2 = f11
+F_S2T2 = f12
+F_X = f13
+F_D = f14
+F_2M64 = f15
+
+F_CS2 = f32
+F_CS3 = f33
+F_CS4 = f34
+F_CS5 = f35
+F_CS6 = f36
+F_CS7 = f37
+F_CS8 = f38
+F_CS9 = f39
+F_S23 = f40
+F_S45 = f41
+F_S67 = f42
+F_S89 = f43
+F_S25 = f44
+F_S69 = f45
+F_S29 = f46
+F_X2 = f47
+F_X4 = f48
+F_TSQRT = f49
+F_DTX = f50
+F_R = f51
+F_R2 = f52
+F_R3 = f53
+F_R4 = f54
+
+F_C3 = f55
+F_C5 = f56
+F_C7 = f57
+F_C9 = f58
+F_P79 = f59
+F_P35 = f60
+F_P39 = f61
+
+F_ATHI = f62
+F_ATLO = f63
+
+F_T1 = f64
+F_Y = f65
+F_Y2 = f66
+F_ANDMASK = f67
+F_ORMASK = f68
+F_S = f69
+F_05 = f70
+F_SQRT_1S2 = f71
+F_DS = f72
+F_Z = f73
+F_1T2 = f74
+F_DZ = f75
+F_ZE = f76
+F_YZ = f77
+F_Y1S2 = f78
+F_Y1S2X = f79
+F_1X = f80
+F_ST = f81
+F_1T2_ST = f82
+F_TSS = f83
+F_Y1S2X2 = f84
+F_DZ_TERM = f85
+F_DTS = f86
+F_DS2X = f87
+F_T2 = f88
+F_ZY1S2S = f89
+F_Y1S2_1X = f90
+F_TS = f91
+F_PI2_LO = f92
+F_PI2_HI = f93
+F_S19 = f94
+F_INV1T2_2 = f95
+F_CORR = f96
+F_DZ0 = f97
+
+F_C11 = f98
+F_C13 = f99
+F_C15 = f100
+F_C17 = f101
+F_P1113 = f102
+F_P1517 = f103
+F_P1117 = f104
+F_P317 = f105
+F_R8 = f106
+F_HI = f107
+F_1S2_HI = f108
+F_DS2 = f109
+F_Y2_2 = f110
+F_S2 = f111
+F_S_DS2 = f112
+F_S_1S2S = f113
+F_XL = f114
+F_2M128 = f115
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(asinl)
+
+{.mfi
+ // get exponent, mantissa (rounded to double precision) of s
+ getf.d R_DBL_S = f8
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // r2 = pointer to T_table
+ addl r2 = @ltoff(T_table), gp
+}
+
+{.mfi
+ // sign mask
+ mov R_SGNMASK = 0x20000
+ nop.f 0
+ // bias-63-1
+ mov R_TMP03 = 0xffff-64;;
+}
+
+
+{.mfi
+ // get exponent of s
+ getf.exp R_EXP = f8
+ nop.f 0
+ // R_TMP4 = 2^45
+ shl R_TMP4 = R_SGNMASK, 45-17
+}
+
+{.mlx
+ // load bias-4
+ mov R_TMP = 0xffff-4
+ // load RU(sqrt(2)/2) to integer register (in double format, shifted left by 1)
+ movl R_TMP2 = 0x7fcd413cccfe779a;;
+}
+
+
+{.mfi
+ // load 2^{-64} in FP register
+ setf.exp F_2M64 = R_TMP03
+ nop.f 0
+ // index = (0x7-exponent)|b1 b2.. b6
+ extr.u R_INDEX = R_DBL_S, 46, 9
+}
+
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 x.. x
+ or R_T = R_DBL_S, R_TMP4
+ nop.f 0
+ // R_TMP4 = 2^45-1
+ sub R_TMP4 = R_TMP4, r0, 1;;
+}
+
+
+{.mfi
+ // get t = sign|exponent|b1 b2.. b6 1 0.. 0
+ andcm R_T = R_T, R_TMP4
+ nop.f 0
+ // eliminate sign from R_DBL_S (shift left by 1)
+ shl R_TMP3 = R_DBL_S, 1
+}
+
+{.mfi
+ // R_BIAS = 3*2^6
+ mov R_BIAS = 0xc0
+ nop.f 0
+ // eliminate sign from R_EXP
+ andcm R_EXP0 = R_EXP, R_SGNMASK;;
+}
+
+
+
+{.mfi
+ // load start address for T_table
+ ld8 r2 = [r2]
+ nop.f 0
+ // p8 = 1 if |s|> = sqrt(2)/2
+ cmp.geu p8, p0 = R_TMP3, R_TMP2
+}
+
+{.mlx
+ // p7 = 1 if |s|<2^{-4} (exponent of s = sqrt(2)/2, take alternate path
+ (p8) br.cond.sptk LARGE_S
+}
+
+{.mlx
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ // sqrt coefficient cs9 = 55*13/128
+ movl R_TMP = 0x40b2c000;;
+}
+
+
+{.mfi
+ // sqrt coefficient cs8 = -33*13/128
+ setf.s F_CS8 = R_TMP2
+ nop.f 0
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5
+}
+
+{.mfi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ mov R_TMP4 = 0xffff - 1
+ nop.f 0
+ // sqrt coefficient cs6 = -21/16
+ mov R_TMP6 = 0xbfa8;;
+}
+
+
+{.mlx
+ // table index
+ add r2 = r2, R_INDEX
+ // sqrt coefficient cs7 = 33/16
+ movl R_TMP2 = 0x40040000;;
+}
+
+
+{.mmi
+ // load cs9 = 55*13/128
+ setf.s F_CS9 = R_TMP
+ // sqrt coefficient cs5 = 7/8
+ mov R_TMP3 = 0x3f60
+ // sqrt coefficient cs6 = 21/16
+ shl R_TMP6 = R_TMP6, 16;;
+}
+
+
+{.mmi
+ // load significand of 1/(1-t^2)
+ ldf8 F_INV_1T2 = [r2], 8
+ // sqrt coefficient cs7 = 33/16
+ setf.s F_CS7 = R_TMP2
+ // sqrt coefficient cs4 = -5/8
+ mov R_TMP5 = 0xbf20;;
+}
+
+
+{.mmi
+ // load significand of sqrt(1-t^2)
+ ldf8 F_SQRT_1T2 = [r2], 8
+ // sqrt coefficient cs6 = 21/16
+ setf.s F_CS6 = R_TMP6
+ // sqrt coefficient cs5 = 7/8
+ shl R_TMP3 = R_TMP3, 16;;
+}
+
+
+{.mmi
+ // sqrt coefficient cs3 = 0.5 (set exponent = bias-1)
+ setf.exp F_CS3 = R_TMP4
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // sqrt coefficient cs4 = -5/8
+ shl R_TMP5 = R_TMP5, 16;;
+}
+
+
+{.mfi
+ // sqrt coefficient cs5 = 7/8
+ setf.s F_CS5 = R_TMP3
+ // d = s-t
+ fms.s1 F_D = f8, f1, F_T
+ // set p6 = 1 if s<0, p11 = 1 if s> = 0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
+}
+
+{.mfi
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // s+t
+ fma.s1 F_S2T2 = f8, f1, F_T
+ nop.i 0;;
+}
+
+
+{.mfi
+ // sqrt coefficient cs4 = -5/8
+ setf.s F_CS4 = R_TMP5
+ // s^2-t^2
+ fma.s1 F_S2T2 = F_S2T2, F_D, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // 0.5/(1-t^2) = 2^{-64}*(2^63/(1-t^2))
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+{.mfi
+ // load C_5
+ ldfe F_C5 = [r3], 16
+ // set correct exponent for sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load C_7, C_9
+ ldfpd F_C7, F_C9 = [r3]
+ // x = -(s^2-t^2)/(1-t^2)/2
+ fnma.s1 F_X = F_INV_1T2, F_S2T2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // t*sqrt(1-t^2)
+ fma.s1 F_TSQRT = F_T, F_SQRT_1T2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // cs9*x+cs8
+ fma.s1 F_S89 = F_CS9, F_X, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // cs7*x+cs6
+ fma.s1 F_S67 = F_CS7, F_X, F_CS6
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // cs5*x+cs4
+ fma.s1 F_S45 = F_CS5, F_X, F_CS4
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x*x
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (s-t)-t*x
+ fnma.s1 F_DTX = F_T, F_X, F_D
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // cs3*x+cs2 (cs2 = -0.5 = -cs3)
+ fms.s1 F_S23 = F_CS3, F_X, F_CS3
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // cs9*x^3+cs8*x^2+cs7*x+cs6
+ fma.s1 F_S69 = F_S89, F_X2, F_S67
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // t*sqrt(1-t^2)*x^2
+ fma.s1 F_TSQRT = F_TSQRT, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S25 = F_S45, F_X2, F_S23
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // ((s-t)-t*x)*sqrt(1-t^2)
+ fma.s1 F_DTX = F_DTX, F_SQRT_1T2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_low
+ (p6) fnma.s1 F_ATLO = F_ATLO, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // PS29 = cs9*x^7+..+cs5*x^3+cs4*x^2+cs3*x+cs2
+ fma.s1 F_S29 = F_S69, F_X4, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if sign is negative, negate table values: asin(t)_high
+ (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R = ((s-t)-t*x)*sqrt(1-t^2)-t*sqrt(1-t^2)*x^2*PS29
+ fnma.s1 F_R = F_S29, F_TSQRT, F_DTX
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_P39 = F_P39, f1, F_R
+ nop.i 0;;
+}
+
+
+{.mfb
+ nop.m 0
+ // result = asin(t)_high+R+asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s0 f8 = F_ATHI, f1, F_P39
+ // return
+ br.ret.sptk b0;;
+}
+
+
+
+
+LARGE_S:
+
+{.mfi
+ // bias-1
+ mov R_TMP3 = 0xffff - 1
+ // y ~ 1/sqrt(1-s^2)
+ frsqrta.s1 F_Y, p7 = F_1S2
+ // c9 = 55*13*17/128
+ mov R_TMP4 = 0x10af7b
+}
+
+{.mlx
+ // c8 = -33*13*15/128
+ mov R_TMP5 = 0x184923
+ movl R_TMP2 = 0xff00000000000000;;
+}
+
+{.mfi
+ // set p6 = 1 if s<0, p11 = 1 if s>0
+ cmp.ge p6, p11 = R_EXP, R_DBL_S
+ // 1-s^2
+ fnma.s1 F_1S2 = f8, f8, f1
+ // set p9 = 1
+ cmp.eq p9, p0 = r0, r0;;
+}
+
+
+{.mfi
+ // load 0.5
+ setf.exp F_05 = R_TMP3
+ // (1-s^2) rounded to single precision
+ fnma.s.s1 F_1S2_S = f8, f8, f1
+ // c9 = 55*13*17/128
+ shl R_TMP4 = R_TMP4, 10
+}
+
+{.mlx
+ // AND mask for getting t ~ sqrt(1-s^2)
+ setf.sig F_ANDMASK = R_TMP2
+ // OR mask
+ movl R_TMP2 = 0x0100000000000000;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (s^2)_s
+ fma.s.s1 F_S2 = f8, f8, f0
+ nop.i 0;;
+}
+
+
+{.mmi
+ // c9 = 55*13*17/128
+ setf.s F_CS9 = R_TMP4
+ // c7 = 33*13/16
+ mov R_TMP4 = 0x41d68
+ // c8 = -33*13*15/128
+ shl R_TMP5 = R_TMP5, 11;;
+}
+
+
+{.mfi
+ setf.sig F_ORMASK = R_TMP2
+ // y^2
+ fma.s1 F_Y2 = F_Y, F_Y, f0
+ // c7 = 33*13/16
+ shl R_TMP4 = R_TMP4, 12
+}
+
+{.mfi
+ // c6 = -33*7/16
+ mov R_TMP6 = 0xc1670
+ // y' ~ sqrt(1-s^2)
+ fma.s1 F_T1 = F_Y, F_1S2, f0
+ // c5 = 63/8
+ mov R_TMP7 = 0x40fc;;
+}
+
+
+{.mlx
+ // load c8 = -33*13*15/128
+ setf.s F_CS8 = R_TMP5
+ // c4 = -35/8
+ movl R_TMP5 = 0xc08c0000;;
+}
+
+{.mfi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp
+ // 1-(1-s^2)_s
+ fnma.s1 F_DS = F_1S2_S, f1, f1
+ // p9 = 0 if p7 = 1 (p9 = 1 for special cases only)
+ (p7) cmp.ne p9, p0 = r0, r0
+}
+
+{.mlx
+ // load c7 = 33*13/16
+ setf.s F_CS7 = R_TMP4
+ // c3 = 5/2
+ movl R_TMP4 = 0x40200000;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-(s^2)_s
+ fnma.s1 F_S_1S2S = F_S2, f1, f1
+ nop.i 0
+}
+
+{.mlx
+ // load c4 = -35/8
+ setf.s F_CS4 = R_TMP5
+ // c2 = -3/2
+ movl R_TMP5 = 0xbfc00000;;
+}
+
+
+{.mfi
+ // load c3 = 5/2
+ setf.s F_CS3 = R_TMP4
+ // x = (1-s^2)_s*y^2-1
+ fms.s1 F_X = F_1S2_S, F_Y2, f1
+ // c6 = -33*7/16
+ shl R_TMP6 = R_TMP6, 12
+}
+
+{.mfi
+ nop.m 0
+ // y^2/2
+ fma.s1 F_Y2_2 = F_Y2, F_05, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // load c6 = -33*7/16
+ setf.s F_CS6 = R_TMP6
+ // eliminate lower bits from y'
+ fand F_T = F_T1, F_ANDMASK
+ // c5 = 63/8
+ shl R_TMP7 = R_TMP7, 16
+}
+
+{.mfb
+ // r3 = load start address to polynomial coefficients
+ ld8 r3 = [r3]
+ // 1-(1-s^2)_s-s^2
+ fnma.s1 F_DS = f8, f8, F_DS
+ // p9 = 1 if s is a special input (NaN, or |s|> = 1)
+ (p9) br.cond.spnt ASINL_SPECIAL_CASES;;
+}
+
+{.mmf
+ // get exponent, significand of y' (in single prec.)
+ getf.s R_TMP = F_T1
+ // load c3 = -3/2
+ setf.s F_CS2 = R_TMP5
+ // y*(1-s^2)
+ fma.s1 F_Y1S2 = F_Y, F_1S2, f0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // x' = (y^2/2)*(1-(s^2)_s)-0.5
+ fms.s1 F_XL = F_Y2_2, F_S_1S2S, F_05
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // s^2-(s^2)_s
+ fms.s1 F_S_DS2 = f8, f8, F_S2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s<0, set s = -s
+ (p6) fnma.s1 f8 = f8, f1, f0
+ nop.i 0;;
+}
+
+{.mfi
+ // load c5 = 63/8
+ setf.s F_CS5 = R_TMP7
+ // x = (1-s^2)_s*y^2-1+(1-(1-s^2)_s-s^2)*y^2
+ fma.s1 F_X = F_DS, F_Y2, F_X
+ // for t = 2^k*1.b1 b2.., get 7-k|b1.. b6
+ extr.u R_INDEX = R_TMP, 17, 9;;
+}
+
+
+{.mmi
+ // index = (4-exponent)|b1 b2.. b6
+ sub R_INDEX = R_INDEX, R_BIAS
+ nop.m 0
+ // get exponent of y
+ shr.u R_TMP2 = R_TMP, 23;;
+}
+
+{.mmi
+ // load C3
+ ldfe F_C3 = [r3], 16
+ // set p8 = 1 if y'<2^{-4}
+ cmp.gt p8, p0 = 0x7b, R_TMP2
+ // shift R_INDEX by 5
+ shl R_INDEX = R_INDEX, 5;;
+}
+
+
+{.mfb
+ // get table index for sqrt(1-t^2)
+ add r2 = r2, R_INDEX
+ // get t = 2^k*1.b1 b2.. b7 1
+ for F_T = F_T, F_ORMASK
+ (p8) br.cond.spnt VERY_LARGE_INPUT;;
+}
+
+
+
+{.mmf
+ // load C5
+ ldfe F_C5 = [r3], 16
+ // load 1/(1-t^2)
+ ldfp8 F_INV_1T2, F_SQRT_1T2 = [r2], 16
+ // x = ((1-s^2)*y^2-1)/2
+ fma.s1 F_X = F_X, F_05, f0;;
+}
+
+
+
+{.mmf
+ nop.m 0
+ // C7, C9
+ ldfpd F_C7, F_C9 = [r3], 16
+ // set correct exponent for t
+ fmerge.se F_T = F_T1, F_T;;
+}
+
+
+
+{.mfi
+ // pi/2 (low, high)
+ ldfpd F_PI2_LO, F_PI2_HI = [r3]
+ // c9*x+c8
+ fma.s1 F_S89 = F_X, F_CS9, F_CS8
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^2
+ fma.s1 F_X2 = F_X, F_X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x
+ fma.s1 F_Y1S2X = F_Y1S2, F_X, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c7*x+c6
+ fma.s1 F_S67 = F_X, F_CS7, F_CS6
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-x
+ fnma.s1 F_1X = F_X, f1, f1
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3*x+c2
+ fma.s1 F_S23 = F_X, F_CS3, F_CS2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-t^2
+ fnma.s1 F_1T2 = F_T, F_T, f1
+ nop.i 0
+}
+
+{.mfi
+ // load asin(t)_high, asin(t)_low
+ ldfpd F_ATHI, F_ATLO = [r2]
+ // c5*x+c4
+ fma.s1 F_S45 = F_X, F_CS5, F_CS4
+ nop.i 0;;
+}
+
+
+
+{.mfi
+ nop.m 0
+ // t*s
+ fma.s1 F_TS = F_T, f8, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // 0.5/(1-t^2)
+ fma.s1 F_INV_1T2 = F_INV_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // z~sqrt(1-t^2), rounded to 24 significant bits
+ fma.s.s1 F_Z = F_SQRT_1T2, F_2M64, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // sqrt(1-t^2)
+ fma.s1 F_SQRT_1T2 = F_SQRT_1T2, F_2M64, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)*x^2
+ fma.s1 F_Y1S2X2 = F_Y1S2, F_X2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // x^4
+ fma.s1 F_X4 = F_X2, F_X2, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // s*t rounded to 24 significant bits
+ fma.s.s1 F_TSS = F_T, f8, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c9*x^3+..+c6
+ fma.s1 F_S69 = F_X2, F_S89, F_S67
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // ST = (t^2-1+s^2) rounded to 24 significant bits
+ fms.s.s1 F_ST = f8, f8, F_1T2
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c5*x^3+..+c2
+ fma.s1 F_S25 = F_X2, F_S45, F_S23
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 0.25/(1-t^2)
+ fma.s1 F_INV1T2_2 = F_05, F_INV_1T2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // t*s-sqrt(1-t^2)*(1-s^2)*y
+ fnma.s1 F_TS = F_Y1S2, F_SQRT_1T2, F_TS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z*0.5/(1-t^2)
+ fma.s1 F_ZE = F_INV_1T2, F_SQRT_1T2, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // z^2+t^2-1
+ fms.s1 F_DZ0 = F_Z, F_Z, F_1T2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DS2X = F_X, F_DS, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // t*s-(t*s)_s
+ fms.s1 F_DTS = F_T, f8, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c9*x^7+..+c2
+ fma.s1 F_S29 = F_X4, F_S69, F_S25
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*z
+ fma.s1 F_YZ = F_Z, F_Y, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // t^2
+ fma.s1 F_T2 = F_T, F_T, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // 1-t^2+ST
+ fma.s1 F_1T2_ST = F_ST, f1, F_1T2
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // y*(1-s^2)(1-x)
+ fma.s1 F_Y1S2_1X = F_Y1S2, F_1X, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // dz ~ sqrt(1-t^2)-z
+ fma.s1 F_DZ = F_DZ0, F_ZE, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // -1+correction for sqrt(1-t^2)-z
+ fnma.s1 F_CORR = F_INV1T2_2, F_DZ0, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2+x)*y*(1-s^2)
+ fma.s1 F_S19 = F_Y1S2X2, F_S29, F_Y1S2X
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z*y*(1-s^2)_s
+ fma.s1 F_ZY1S2S = F_YZ, F_1S2_S, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // s^2-(1-t^2+ST)
+ fms.s1 F_1T2_ST = f8, f8, F_1T2_ST
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x
+ fma.s1 F_DTS = F_YZ, F_DS2X, F_DTS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ, F_Y1S2_1X, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R = t*s-sqrt(1-t^2)*(1-s^2)*y+sqrt(1-t^2)*(1-s^2)*y*PS19
+ // (used for polynomial evaluation)
+ fma.s1 F_R = F_S19, F_SQRT_1T2, F_TS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (PS29*x^2)*y*(1-s^2)
+ fma.s1 F_S29 = F_Y1S2X2, F_S29, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // apply correction to dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, F_CORR, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // R^2
+ fma.s1 F_R2 = F_R, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s-(t*s)_s)+z*y*(1-s^2-(1-s^2)_s)*x+dz*y*(1-s^2)*(1-x)
+ fma.s1 F_DZ_TERM = F_DZ_TERM, f1, F_DTS
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // c7+c9*R^2
+ fma.s1 F_P79 = F_C9, F_R2, F_C7
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2
+ fma.s1 F_P35 = F_C5, F_R2, F_C3
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // asin(t)_low-(pi/2)_low
+ fms.s1 F_ATLO = F_ATLO, f1, F_PI2_LO
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // R^4
+ fma.s1 F_R4 = F_R2, F_R2, f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // R^3
+ fma.s1 F_R3 = F_R2, F_R, f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z
+ fnma.s1 F_TSS = F_T2, F_YZ, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)
+ fma.s1 F_DZ_TERM = F_YZ, F_1T2_ST, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (pi/2)_hi-asin(t)_hi
+ fms.s1 F_ATHI = F_PI2_HI, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // c3+c5*R^2+c7*R^4+c9*R^6
+ fma.s1 F_P39 = F_P79, F_R4, F_P35
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST)+
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29
+ fma.s1 F_DZ_TERM = F_SQRT_1T2, F_S29, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_TSS = F_YZ, F_ST, F_TSS
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // -asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fms.s1 F_P39 = F_P39, F_R3, F_ATLO
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // if s<0, change sign of F_ATHI
+ (p6) fnma.s1 F_ATHI = F_ATHI, f1, f0
+ nop.i 0
+}
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_P39, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6)
+ fma.s1 F_DZ_TERM = F_ZY1S2S, F_X, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // d(ts)+z*y*d(1-s^2)*x+dz*y*(1-s^2)*(1-x)+z*y*(s^2-1+t^2-ST) +
+ // + sqrt(1-t^2)*y*(1-s^2)*x^2*PS29 + z*y*(1-s^2)_s*x +
+ // - asin(t)_low+R^3*(c3+c5*R^2+c7*R^4+c9*R^6) +
+ // + (t*s)_s-t^2*y*z+z*y*ST
+ fma.s1 F_DZ_TERM = F_TSS, f1, F_DZ_TERM
+ nop.i 0;;
+}
+
+
+.pred.rel "mutex", p6, p11
+{.mfi
+ nop.m 0
+ // result: add high part of pi/2-table value
+ // s>0 in this case
+ (p11) fma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ nop.i 0
+}
+
+{.mfb
+ nop.m 0
+ // result: add high part of pi/2-table value
+ // if s<0
+ (p6) fnma.s0 f8 = F_DZ_TERM, f1, F_ATHI
+ br.ret.sptk b0;;
+}
+
+
+
+
+
+
+SMALL_S:
+
+ // use 15-term polynomial approximation
+
+{.mmi
+ // r3 = pointer to polynomial coefficients
+ addl r3 = @ltoff(poly_coeffs), gp;;
+ // load start address for coefficients
+ ld8 r3 = [r3]
+ mov R_TMP = 0x3fbf;;
+}
+
+
+{.mmi
+ add r2 = 64, r3
+ ldfe F_C3 = [r3], 16
+ // p7 = 1 if |s|<2^{-64} (exponent of s atan2(V/U) where U >= V. If Y > X, we must swap.
+//
+// p6 swap True |Y| > |X|
+// p7 swap False |Y| <= |X|
+// p8 X+ (If swap=True p8=p9=0)
+// p9 X-
+//
+// all the other predicates p10 thru p15 are false for the main path
+//
+// Simple trigonometric identities show
+// Region 1 (-45 to +45 degrees):
+// X>0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (0 + atan(V/U))
+//
+// Region 2 (-90 to -45 degrees, and +45 to +90 degrees):
+// X>0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 - atan(V/U))
+//
+// Region 3 (-135 to -90 degrees, and +90 to +135 degrees):
+// X<0, |Y|>X, V=X, U=Y atan2(Y,X) = sgnY * (pi/2 + atan(V/U))
+//
+// Region 4 (-180 to -135 degrees, and +135 to +180 degrees):
+// X<0, |Y|<=X, V=Y, U=X atan2(Y,X) = sgnY * (pi - atan(V/U))
+//
+// So the result is always of the form atan2(Y,X) = P + sgnXY * atan(V/U)
+//
+// We compute atan(V/U) from the identity
+// atan(z) + atan([(V/U)-z] / [1+(V/U)z])
+// where z is a limited precision approximation (16 bits) to V/U
+//
+// z is calculated with the assistance of the frcpa instruction.
+//
+// atan(z) is calculated by a polynomial z + z^3 * p(w), w=z^2
+// where p(w) = P0+P1*w+...+P22*w^22
+//
+// Let d = [(V/U)-z] / [1+(V/U)z]) = (V-U*z)/(U+V*z)
+//
+// Approximate atan(d) by d + P0*d^3
+// Let F = 1/(U+V*z) * (1-a), where |a|< 2^-8.8.
+// Compute q(a) = 1 + a + ... + a^5.
+// Then F*q(a) approximates the reciprocal to more than 50 bits.
+
+// Special values
+//==============================================================
+// Y x Result
+// +number +inf +0
+// -number +inf -0
+// +number -inf +pi
+// -number -inf -pi
+//
+// +inf +number +pi/2
+// -inf +number -pi/2
+// +inf -number +pi/2
+// -inf -number -pi/2
+//
+// +inf +inf +pi/4
+// -inf +inf -pi/4
+// +inf -inf +3pi/4
+// -inf -inf -3pi/4
+//
+// +1 +1 +pi/4
+// -1 +1 -pi/4
+// +1 -1 +3pi/4
+// -1 -1 -3pi/4
+//
+// +number +0 +pi/2
+// -number +0 -pi/2
+// +number -0 +pi/2
+// -number -0 -pi/2
+//
+// +0 +number +0
+// -0 +number -0
+// +0 -number +pi
+// -0 -number -pi
+//
+// +0 +0 +0
+// -0 +0 -0
+// +0 -0 +pi
+// -0 -0 -pi
+//
+// Nan anything quiet Y
+// Not NaN NaN quiet X
+
+// atan2(+-0/+-0) sets double error tag to 37
+
+// Registers used
+//==============================================================
+
+// predicate registers used:
+// p6 -> p15
+
+// floating-point registers used:
+// f8, f9 input
+// f32 -> f119
+
+// general registers used
+// r32 -> r41
+
+// Assembly macros
+//==============================================================
+
+EXP_AD_P1 = r33
+EXP_AD_P2 = r34
+rsig_near_one = r35
+
+
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+GR_SAVE_PFS = r37
+
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+atan2_GR_tag = r41
+
+atan2_Y = f8
+atan2_X = f9
+
+atan2_u1_X = f32
+atan2_u1_Y = f33
+atan2_z2_X = f34
+atan2_z2_Y = f35
+
+atan2_two = f36
+atan2_B1sq_Y = f37
+atan2_z1_X = f38
+atan2_z1_Y = f39
+atan2_B1X = f40
+
+atan2_B1Y = f41
+atan2_wp_X = f42
+atan2_B1sq_X = f43
+atan2_z = f44
+atan2_w = f45
+
+atan2_P0 = f46
+atan2_P1 = f47
+atan2_P2 = f48
+atan2_P3 = f49
+atan2_P4 = f50
+
+atan2_P5 = f51
+atan2_P6 = f52
+atan2_P7 = f53
+atan2_P8 = f54
+atan2_P9 = f55
+
+atan2_P10 = f56
+atan2_P11 = f57
+atan2_P12 = f58
+atan2_P13 = f59
+atan2_P14 = f60
+
+atan2_P15 = f61
+atan2_P16 = f62
+atan2_P17 = f63
+atan2_P18 = f64
+atan2_P19 = f65
+
+atan2_P20 = f66
+atan2_P21 = f67
+atan2_P22 = f68
+atan2_tmp = f68
+atan2_pi_by_2 = f69
+atan2_sgn_pi_by_2 = f69
+atan2_V13 = f70
+
+atan2_W11 = f71
+atan2_E = f72
+atan2_wp_Y = f73
+atan2_V11 = f74
+atan2_V12 = f75
+
+atan2_V7 = f76
+atan2_V8 = f77
+atan2_W7 = f78
+atan2_W8 = f79
+atan2_W3 = f80
+
+atan2_W4 = f81
+atan2_V3 = f82
+atan2_V4 = f83
+atan2_F = f84
+atan2_gV = f85
+
+atan2_V10 = f86
+atan2_zcub = f87
+atan2_V6 = f88
+atan2_V9 = f89
+atan2_W10 = f90
+
+atan2_W6 = f91
+atan2_W2 = f92
+atan2_V2 = f93
+atan2_alpha = f94
+atan2_alpha_1 = f95
+
+atan2_gVF = f96
+atan2_V5 = f97
+atan2_W12 = f98
+atan2_W5 = f99
+atan2_alpha_sq = f100
+
+atan2_Cp = f101
+atan2_V1 = f102
+atan2_ysq = f103
+atan2_W1 = f104
+atan2_alpha_cub = f105
+
+atan2_C = f106
+atan2_xsq = f107
+atan2_d = f108
+atan2_A_hi = f109
+atan2_dsq = f110
+
+atan2_pd = f111
+atan2_A_lo = f112
+atan2_A = f113
+atan2_Pp = f114
+atan2_sgnY = f115
+
+atan2_sig_near_one = f116
+atan2_near_one = f116
+atan2_pi = f117
+atan2_sgn_pi = f117
+atan2_3pi_by_4 = f118
+atan2_pi_by_4 = f119
+
+
+/////////////////////////////////////////////////////////////
+
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(atan2_tb1)
+data8 0xA21922DC45605EA1 , 0x00003FFA // P11
+data8 0xB199DD6D2675C40F , 0x0000BFFA // P10
+data8 0xC2F01E5DDD100DBE , 0x00003FFA // P9
+data8 0xD78F28FC2A592781 , 0x0000BFFA // P8
+data8 0xF0F03ADB3FC930D3 , 0x00003FFA // P7
+data8 0x88887EBB209E3543 , 0x0000BFFB // P6
+data8 0x9D89D7D55C3287A5 , 0x00003FFB // P5
+data8 0xBA2E8B9793955C77 , 0x0000BFFB // P4
+data8 0xE38E38E320A8A098 , 0x00003FFB // P3
+data8 0x9249249247E37913 , 0x0000BFFC // P2
+data8 0xCCCCCCCCCCC906CD , 0x00003FFC // P1
+data8 0xAAAAAAAAAAAAA8A9 , 0x0000BFFD // P0
+data8 0xC90FDAA22168C235 , 0x00004000 // pi
+LOCAL_OBJECT_END(atan2_tb1)
+
+LOCAL_OBJECT_START(atan2_tb2)
+data8 0xCE585A259BD8374C , 0x00003FF0 // P21
+data8 0x9F90FB984D8E39D0 , 0x0000BFF3 // P20
+data8 0x9D3436AABE218776 , 0x00003FF5 // P19
+data8 0xDEC343E068A6D2A8 , 0x0000BFF6 // P18
+data8 0xF396268151CFB11C , 0x00003FF7 // P17
+data8 0xD818B4BB43D84BF2 , 0x0000BFF8 // P16
+data8 0xA2270D30A90AA220 , 0x00003FF9 // P15
+data8 0xD5F4F2182E7A8725 , 0x0000BFF9 // P14
+data8 0x80D601879218B53A , 0x00003FFA // P13
+data8 0x9297B23CCFFB291F , 0x0000BFFA // P12
+data8 0xFE7E52D2A89995B3 , 0x0000BFEC // P22
+data8 0xC90FDAA22168C235 , 0x00003FFF // pi/2
+data8 0xC90FDAA22168C235 , 0x00003FFE // pi/4
+data8 0x96cbe3f9990e91a8 , 0x00004000 // 3pi/4
+LOCAL_OBJECT_END(atan2_tb2)
+
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(atan2)
+
+{ .mfi
+ alloc r32 = ar.pfs,1,5,4,0
+ frcpa.s1 atan2_u1_X,p6 = f1,atan2_X
+ nop.i 999
+}
+{ .mfi
+ addl EXP_AD_P1 = @ltoff(atan2_tb1), gp
+ fma.s1 atan2_two = f1,f1,f1
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ld8 EXP_AD_P1 = [EXP_AD_P1]
+ frcpa.s1 atan2_u1_Y,p7 = f1,atan2_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_xsq = atan2_X,atan2_X,f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fclass.m p10,p0 = atan2_Y, 0xc3 // Test for y=nan
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_ysq = atan2_Y,atan2_Y,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ add EXP_AD_P2 = 0xd0,EXP_AD_P1
+ fclass.m p12,p0 = atan2_X, 0xc3 // Test for x nan
+ nop.i 999
+}
+;;
+
+
+// p10 Y NAN, quiet and return
+{ .mfi
+ ldfe atan2_P11 = [EXP_AD_P1],16
+ fmerge.s atan2_sgnY = atan2_Y,f1
+ nop.i 999
+}
+{ .mfb
+ ldfe atan2_P21 = [EXP_AD_P2],16
+(p10) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If y=nan, result quietized y
+(p10) br.ret.spnt b0 // Exit if y=nan
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P10 = [EXP_AD_P1],16
+ fma.s1 atan2_z1_X = atan2_u1_X, atan2_Y, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P20 = [EXP_AD_P2],16
+ fnma.s1 atan2_B1X = atan2_u1_X, atan2_X, atan2_two
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ldfe atan2_P9 = [EXP_AD_P1],16
+ fma.s1 atan2_z1_Y = atan2_u1_Y, atan2_X, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P19 = [EXP_AD_P2],16
+ fnma.s1 atan2_B1Y = atan2_u1_Y, atan2_Y, atan2_two
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe atan2_P8 = [EXP_AD_P1],16
+ fma.s1 atan2_z2_X = atan2_u1_X, atan2_ysq, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P18 = [EXP_AD_P2],16
+ fma.s1 atan2_z2_Y = atan2_u1_Y, atan2_xsq, f0
+ nop.i 999
+}
+;;
+
+// p10 ==> x inf y ?
+// p11 ==> x !inf y ?
+{ .mfi
+ ldfe atan2_P7 = [EXP_AD_P1],16
+ fclass.m p10,p11 = atan2_X, 0x23 // test for x inf
+ nop.i 999
+}
+{ .mfb
+ ldfe atan2_P17 = [EXP_AD_P2],16
+(p12) fma.d.s0 f8 = atan2_X,atan2_Y,f0 // If x nan, result quiet x
+(p12) br.ret.spnt b0 // Exit for x nan
+;;
+}
+
+// p6 true if swap, means |y| > |x| or ysq > xsq
+// p7 true if no swap, means |x| >= |y| or xsq >= ysq
+{ .mmf
+ ldfe atan2_P6 = [EXP_AD_P1],16
+ ldfe atan2_P16 = [EXP_AD_P2],16
+ fcmp.ge.s1 p7,p6 = atan2_xsq, atan2_ysq
+;;
+}
+
+{ .mfi
+ ldfe atan2_P5 = [EXP_AD_P1],16
+ fma.s1 atan2_wp_X = atan2_z1_X, atan2_z1_X, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P15 = [EXP_AD_P2],16
+ fma.s1 atan2_B1sq_X = atan2_B1X, atan2_B1X, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ldfe atan2_P4 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_wp_Y = atan2_z1_Y, atan2_z1_Y, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P14 = [EXP_AD_P2],16
+(p6) fma.s1 atan2_B1sq_Y = atan2_B1Y, atan2_B1Y, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ldfe atan2_P3 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_E = atan2_z2_Y, atan2_B1Y, atan2_Y
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P13 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_E = atan2_z2_X, atan2_B1X, atan2_X
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P2 = [EXP_AD_P1],16
+(p6) fma.s1 atan2_z = atan2_z1_Y, atan2_B1Y, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_P12 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_z = atan2_z1_X, atan2_B1X, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ ldfe atan2_P1 = [EXP_AD_P1],16
+ fcmp.eq.s0 p14,p15=atan2_X,atan2_Y // Dummy for denorm and invalid
+ nop.i 999
+}
+{ .mlx
+ ldfe atan2_P22 = [EXP_AD_P2],16
+ movl rsig_near_one = 0x8000000000000001 // signif near 1.0
+;;
+}
+
+
+// p12 ==> x inf y inf
+// p13 ==> x inf y !inf
+{ .mmf
+ ldfe atan2_P0 = [EXP_AD_P1],16
+ ldfe atan2_pi_by_2 = [EXP_AD_P2],16
+(p10) fclass.m.unc p12,p13 = atan2_Y, 0x23 // x inf, test if y inf
+;;
+}
+
+{ .mfi
+ ldfe atan2_pi = [EXP_AD_P1],16
+(p6) fma.s1 atan2_w = atan2_wp_Y, atan2_B1sq_Y,f0
+ nop.i 999
+}
+{ .mfi
+ ldfe atan2_pi_by_4 = [EXP_AD_P2],16
+(p7) fma.s1 atan2_w = atan2_wp_X, atan2_B1sq_X,f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ ldfe atan2_3pi_by_4 = [EXP_AD_P2],16
+(p11) fclass.m.unc p9,p0 = atan2_Y, 0x23 // x not inf, test if y inf
+ nop.i 999
+;;
+}
+
+{ .mfi
+ setf.sig atan2_sig_near_one = rsig_near_one
+(p12) fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x inf, y inf, test if x +inf
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2_gV = atan2_Y, atan2_z, atan2_X
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ frcpa.s1 atan2_F,p0 = f1, atan2_E
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fnma.s1 atan2_gV = atan2_X, atan2_z, atan2_Y
+ nop.i 999
+;;
+}
+
+// p13 ==> x inf y !inf
+{ .mfi
+ nop.m 999
+(p13) fcmp.gt.unc.s1 p14,p15 = atan2_X,f0 // x inf, y !inf, test if x +inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // +-pi/2 if x !inf, y inf
+(p9) br.ret.spnt b0 // exit if x not inf, y inf, result is +-pi/2
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V13 = atan2_w, atan2_P11, atan2_P10
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W11 = atan2_w, atan2_P21, atan2_P20
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V11 = atan2_w, atan2_P9, atan2_P8
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V12 = atan2_w, atan2_w, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V8 = atan2_w, atan2_P7 , atan2_P6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W8 = atan2_w, atan2_P19, atan2_P18
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2_alpha = atan2_E, atan2_F, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2_alpha_1 = atan2_E, atan2_F, atan2_two
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V7 = atan2_w, atan2_P5 , atan2_P4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W7 = atan2_w, atan2_P17, atan2_P16
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V4 = atan2_w, atan2_P3 , atan2_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W4 = atan2_w, atan2_P15, atan2_P14
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V3 = atan2_w, atan2_P1 , atan2_P0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W3 = atan2_w, atan2_P13, atan2_P12
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V10 = atan2_V12, atan2_V13, atan2_V11
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_gVF = atan2_gV, atan2_F, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_alpha_sq = atan2_alpha, atan2_alpha, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_Cp = atan2_alpha, atan2_alpha_1, f1
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V9 = atan2_V12, atan2_V12, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W10 = atan2_V12, atan2_P22 , atan2_W11
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V6 = atan2_V12, atan2_V8 , atan2_V7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W6 = atan2_V12, atan2_W8 , atan2_W7
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V2 = atan2_V12, atan2_V4 , atan2_V3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W2 = atan2_V12, atan2_W4 , atan2_W3
+ nop.i 999
+;;
+}
+
+// p8 ==> y 0 x?
+// p9 ==> y !0 x?
+{ .mfi
+ nop.m 999
+ fclass.m p8,p9 = atan2_Y, 0x07 // Test for y=0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_zcub = atan2_z, atan2_w, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_alpha_cub = atan2_alpha, atan2_alpha_sq, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_C = atan2_gVF, atan2_Cp, f0
+ nop.i 999
+;;
+}
+
+// p12 ==> y0 x0
+// p13 ==> y0 x!0
+{ .mfi
+ nop.m 999
+(p8) fclass.m.unc p12,p13 = atan2_X, 0x07 // y=0, test if x is 0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W12 = atan2_V9, atan2_V9, f0
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V5 = atan2_V9, atan2_V10, atan2_V6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W5 = atan2_V9, atan2_W10, atan2_W6
+ nop.i 999
+;;
+}
+
+
+// p9 ==> y!0 x0
+{ .mfi
+ nop.m 999
+(p9) fclass.m.unc p9,p0 = atan2_X, 0x07 // y not 0, test if x is 0
+ nop.i 999
+}
+// p10 ==> X +INF, Y +-INF
+{ .mfb
+ nop.m 999
+(p10) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_4, f0 // x=+inf, y=inf
+(p10) br.ret.spnt b0 // Exit for x=+inf, y=inf, result is +-pi/4
+;;
+}
+
+.pred.rel "mutex",p11,p14
+{ .mfi
+ nop.m 999
+(p14) fmerge.s f8 = atan2_sgnY, f0 // x=+inf, y !inf, result +-0
+ nop.i 999
+}
+// p11 ==> X -INF, Y +-INF
+{ .mfb
+ nop.m 999
+(p11) fma.d.s0 f8 = atan2_sgnY, atan2_3pi_by_4, f0 // x=-inf, y=inf
+(p11) br.ret.spnt b0 // Exit for x=-inf, y=inf, result is +-3pi/4
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p13) fcmp.gt.unc.s1 p10,p11 = atan2_X,f0 // x not 0, y=0, test if x>0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 atan2_d = atan2_alpha_cub, atan2_C, atan2_C
+(p14) br.ret.spnt b0 // Exit if x=+inf, y !inf, result +-0
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_W12 = atan2_V9, atan2_W12, f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8 = atan2_sgnY, atan2_pi_by_2, f0 // x=0, y not 0
+(p9) br.ret.spnt b0 // Exit if x=0 and y not 0, result is +-pi/2
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_V1 = atan2_V9, atan2_V5, atan2_V2
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 atan2_W1 = atan2_V9, atan2_W5, atan2_W2
+(p12) br.spnt ATAN2_ERROR // Branch if x=0 and y=0
+;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) fmerge.s f8 = atan2_sgnY, f0 // +-0 if x>0, y=0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p11) fma.d.s0 f8 = atan2_sgnY, atan2_pi, f0 // +-pi if x<0, y=0
+(p13) br.ret.spnt b0 // Exit if x!0 and y=0
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_pd = atan2_P0, atan2_d, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_dsq = atan2_d, atan2_d, f0
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fmerge.se atan2_near_one = f1, atan2_sig_near_one // Const ~1.0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_Pp = atan2_W12, atan2_W1, atan2_V1
+ nop.i 999
+;;
+}
+
+// p8 true if no swap and X positive
+// p9 true if no swap and X negative
+// both are false is swap is true
+{ .mfi
+ nop.m 999
+(p7) fcmp.ge.unc.s1 p8,p9 = atan2_X,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fma.d.s0 f8 = atan2_sgnY, atan2_pi, f0
+(p15) br.ret.spnt b0 // Exit if x=-inf, y !inf, result +-pi
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_sgn_pi_by_2 = atan2_pi_by_2, atan2_sgnY, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_A_lo = atan2_pd, atan2_dsq, atan2_d
+ nop.i 999
+;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_sgn_pi = atan2_pi, atan2_sgnY, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_A_hi = atan2_zcub, atan2_Pp, atan2_z
+ nop.i 999
+;;
+}
+
+
+// For |Y| <= |X| and X > 0, force inexact in case A_lo is zero
+{ .mfi
+ nop.m 999
+(p8) fmpy.s0 atan2_tmp = atan2_P22, atan2_P22
+ nop.i 999
+;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2_A = atan2_A_hi, f1, atan2_A_lo
+ nop.i 999
+}
+// For |Y| <= |X| and X > 0, result is A_hi + A_lo
+{ .mfi
+ nop.m 999
+(p8) fma.d.s0 f8 = atan2_A_hi, f1, atan2_A_lo
+ nop.i 999
+;;
+}
+
+.pred.rel "mutex",p6,p9
+// We perturb A by multiplying by 1.0+1ulp as we produce the result
+// in order to get symmetrically rounded results in directed rounding modes.
+// If we don't do this, there are a few cases where the trailing 11 bits of
+// the significand of the result, before converting to double, are zero. These
+// cases do not round symmetrically in round to +infinity or round to -infinity.
+// The perturbation also insures that the inexact flag is set.
+// For |Y| > |X|, result is +- pi/2 - (A_hi + A_lo)
+{ .mfi
+ nop.m 999
+(p6) fnma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi_by_2
+ nop.i 999
+}
+// For |Y| <= |X|, and X < 0, result is +- pi + (A_hi + A_lo)
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8 = atan2_A, atan2_near_one, atan2_sgn_pi
+ br.ret.sptk b0
+;;
+}
+
+ATAN2_ERROR:
+// Here if x=0 and y=0
+{ .mfi
+ nop.m 999
+ fclass.m p10,p11 = atan2_X,0x05 // Test if x=+0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ mov atan2_GR_tag = 37
+(p10) fmerge.s f10 = atan2_sgnY, f0 // x=+0, y=0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fma.d.s0 f10 = atan2_sgnY, atan2_pi, f0 // x=-0, y=0
+ nop.i 999
+;;
+}
+GLOBAL_IEEE754_END(atan2)
+libm_alias_double_other (__atan2, atan2)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f8,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f9 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f10 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atan2f.S b/sysdeps/ia64/fpu/e_atan2f.S
new file mode 100644
index 0000000000..c155154d67
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atan2f.S
@@ -0,0 +1,900 @@
+.file "atan2f.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//==============================================================
+// 06/01/00 Initial version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 08/17/00 Changed predicate register macro-usage to direct predicate
+// names due to an assembler bug.
+// 01/05/01 Fixed flag settings for denormal input.
+// 01/19/01 Added documentation
+// 01/30/01 Improved speed
+// 02/06/02 Corrected .section statement
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+
+// Description
+//=========================================
+// The atan2 function computes the principle value of the arc tangent of y/x using
+// the signs of both arguments to determine the quadrant of the return value.
+// A domain error may occur if both arguments are zero.
+
+// The atan2 function returns the arc tangent of y/x in the range [-pi,+pi] radians.
+
+//..
+//..Let (v,u) = (y,x) if |y| <= |x|, and (v,u) = (x,y) otherwise. Note that
+//..v and u can be negative. We state the relationship between atan2(y,x) and
+//..atan(v/u).
+//..
+//..Let swap = false if v = y, and swap = true if v = x.
+//..Define C according to the matrix
+//..
+//.. TABLE FOR C
+//.. x +ve x -ve
+//.. no swap (swap = false) sgn(y)*0 sgn(y)*pi
+//.. swap (swap = true ) sgn(y)*pi/2 sgn(y)*pi/2
+//..
+//.. atan2(y,x) = C + atan(v/u) if no swap
+//.. atan2(y,x) = C - atan(v/u) if swap
+//..
+//..These relationship is more efficient to compute as we accommodate signs in v and u
+//..saving the need to obtain the absolute value before computation can proceed.
+//..
+//..Suppose (v,u) = (y,x), we calculate atan(v/u) as follows:
+//..A = y * frcpa(x) (so A = (y/x)(1 - beta))
+//..atan(y/x) = atan(A) + atan( ((y/x)-A))/(1 + (y/x)A) ), the second term is
+//..a correction.
+//..atan(A) is approximated by a polynomial
+//..A + p1 A^3 + p2 A^5 + ... + p10 A^21,
+//..atan(G) is approximated as follows:
+//..Let G = (y - Ax)/(x + Ay), atan(G) can be approximated by G + g * p1
+//..where g is a limited precision approximation to G via g = (y - Ax)*frcpa(x + Ay).
+//..
+//..Suppose (v,u) = (x,y), we calculate atan(v/u) as follows:
+//..Z = x * frcpa(y) (so Z = (x/y)(1 - beta))
+//..atan(x/y) = atan(Z) + atan( ((x/y)-Z))/(1 + (x/y)Z) ), the second term is
+//..a correction.
+//..atan(Z) is approximated by a polynomial
+//..Z + p1 Z^3 + p2 Z^5 + ... + p10 Z^21,
+//..atan(T) is approximated as follows:
+//..Let T = (x - Ay)/(y + Ax), atan(T) can be approximated by T + t * p1
+//..where t is a limited precision approximation to T via t = (x - Ay)*frcpa(y + Ax).
+//..
+//..
+//..A = y * frcpa(x)
+//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
+//..
+//..This polynomial is computed as follows:
+//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
+//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
+//..
+//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
+//..poly_A1 = poly_A2 + A4 * poly_A1
+//..poly_A1 = poly_A3 + A4 * poly_A1
+//..
+//..poly_A4 = p1 * A
+//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
+//..poly_A5 = p2 + Asq * poly_A5
+//..poly_A4 = poly_A4 + A5 * poly_A5
+//..
+//..atan_A = poly_A4 + A11 * poly_A1
+//..
+//..atan(G) is approximated as follows:
+//..G_numer = y - A*x, G_denom = x + A*y
+//..H1 = frcpa(G_denom)
+//..H_beta = 1 - H1 * G_denom
+//..H2 = H1 + H1 * H_beta
+//..H_beta2 = H_beta*H_beta
+//..H3 = H2 + H2*H_beta2
+//..g = H1 * G_numer; gsq = g*g; atan_G = g*p1, atan_G = atan_G*gsq
+//..atan_G = G_numer*H3 + atan_G
+//..
+//..
+//..A = y * frcpa(x)
+//..atan(A) ~=~ A + p1 A^3 + ... + P10 A^21
+//..
+//..This polynomial is computed as follows:
+//..Asq = A*A; Acub = A*Asq, A4 = Asq*Asq
+//..A5 = Asq*Acub, A6 = Asq*A4; A11 = A5 * A6
+//..
+//..poly_A1 = p9 + Asq*p10, poly_A2 = p7 + Asq*p8, poly_A3 = p5 + Asq*p6
+//..poly_A1 = poly_A2 + A4 * poly_A1
+//..poly_A1 = poly_A3 + A4 * poly_A1
+//..
+//..poly_A4 = p1 * A
+//,,poly_A5 = p3 + Asq * p4, poly_A4 = A + Asq*poly_A4
+//..poly_A5 = p2 + Asq * poly_A5
+//..poly_A4 = poly_A4 + A5 * poly_A5
+//..
+//..atan_A = poly_A4 + A11 * poly_A1
+//..
+//..
+//..====================================================================
+//.. COEFFICIENTS USED IN THE COMPUTATION
+//..====================================================================
+
+//coef_pj, j = 1,2,...,10; atan(A) ~=~ A + p1 A^3 + p2 A^5 + ... + p10 A^21
+//
+// coef_p1 = -.3333332707155439167401311806315789E+00
+// coef_p1 in dbl = BFD5 5555 1219 1621
+//
+// coef_p2 = .1999967670926658391827857030875748E+00
+// coef_p2 in dbl = 3FC9 997E 7AFB FF4E
+//
+// coef_p3 = -.1427989384500152360161563301087296E+00
+// coef_p3 in dbl = BFC2 473C 5145 EE38
+//
+// coef_p4 = .1105852823460720770079031213661163E+00
+// coef_p4 in dbl = 3FBC 4F51 2B18 65F5
+//
+// coef_p5 = -.8811839915595312348625710228448363E-01
+// coef_p5 in dbl = BFB6 8EED 6A8C FA32
+//
+// coef_p6 = .6742329836955067042153645159059714E-01
+// coef_p6 in dbl = 3FB1 42A7 3D7C 54E3
+//
+// coef_p7 = -.4468571068774672908561591262231909E-01
+// coef_p7 in dbl = BFA6 E10B A401 393F
+//
+// coef_p8 = .2252333246746511135532726960586493E-01
+// coef_p8 in dbl = 3F97 105B 4160 F86B
+//
+// coef_p9 = -.7303884867007574742501716845542314E-02
+// coef_p9 in dbl = BF7D EAAD AA33 6451
+//
+// coef_p10 = .1109686868355312093949039454619058E-02
+// coef_p10 in dbl = 3F52 2E5D 33BC 9BAA
+//
+
+// Special values
+//==============================================================
+// Y x Result
+// +number +inf +0
+// -number +inf -0
+// +number -inf +pi
+// -number -inf -pi
+//
+// +inf +number +pi/2
+// -inf +number -pi/2
+// +inf -number +pi/2
+// -inf -number -pi/2
+//
+// +inf +inf +pi/4
+// -inf +inf -pi/4
+// +inf -inf +3pi/4
+// -inf -inf -3pi/4
+//
+// +1 +1 +pi/4
+// -1 +1 -pi/4
+// +1 -1 +3pi/4
+// -1 -1 -3pi/4
+//
+// +number +0 +pi/2 // does not raise DBZ
+// -number +0 -pi/2 // does not raise DBZ
+// +number -0 +pi/2 // does not raise DBZ
+// -number -0 -pi/2 // does not raise DBZ
+//
+// +0 +number +0
+// -0 +number -0
+// +0 -number +pi
+// -0 -number -pi
+//
+// +0 +0 +0 // does not raise invalid
+// -0 +0 -0 // does not raise invalid
+// +0 -0 +pi // does not raise invalid
+// -0 -0 -pi // does not raise invalid
+//
+// Nan anything quiet Y
+// anything NaN quiet X
+
+// atan2(+-0/+-0) sets double error tag to 37
+// atan2f(+-0/+-0) sets single error tag to 38
+// These are domain errors.
+
+
+//
+// Assembly macros
+//=========================================
+
+
+// integer registers
+atan2f_GR_Addr_1 = r33
+atan2f_GR_Addr_2 = r34
+GR_SAVE_B0 = r35
+
+GR_SAVE_PFS = r36
+GR_SAVE_GP = r37
+
+GR_Parameter_X = r38
+GR_Parameter_Y = r39
+GR_Parameter_RESULT = r40
+GR_Parameter_TAG = r41
+
+// floating point registers
+atan2f_coef_p1 = f32
+atan2f_coef_p10 = f33
+atan2f_coef_p7 = f34
+atan2f_coef_p6 = f35
+
+atan2f_coef_p3 = f36
+atan2f_coef_p2 = f37
+atan2f_coef_p9 = f38
+atan2f_coef_p8 = f39
+atan2f_coef_p5 = f40
+
+atan2f_coef_p4 = f41
+atan2f_const_piby2 = f42
+atan2f_const_pi = f43
+atan2f_const_piby4 = f44
+atan2f_const_3piby4 = f45
+
+atan2f_xsq = f46
+atan2f_ysq = f47
+atan2f_xy = f48
+atan2f_const_1 = f49
+atan2f_sgn_Y = f50
+
+atan2f_Z0 = f51
+atan2f_A0 = f52
+atan2f_Z = f53
+atan2f_A = f54
+atan2f_C = f55
+
+atan2f_U = f56
+atan2f_Usq = f57
+atan2f_U4 = f58
+atan2f_U6 = f59
+atan2f_U8 = f60
+
+atan2f_poly_u109 = f61
+atan2f_poly_u87 = f62
+atan2f_poly_u65 = f63
+atan2f_poly_u43 = f64
+atan2f_poly_u21 = f65
+
+atan2f_poly_u10to7 = f66
+atan2f_poly_u6to3 = f67
+atan2f_poly_u10to3 = f68
+atan2f_poly_u10to0 = f69
+atan2f_poly_u210 = f70
+
+atan2f_T_numer = f71
+atan2f_T_denom = f72
+atan2f_G_numer = f73
+atan2f_G_denom = f74
+atan2f_p1rnum = f75
+
+atan2f_R_denom = f76
+atan2f_R_numer = f77
+atan2f_pR = f78
+atan2f_pRC = f79
+atan2f_pQRC = f80
+
+atan2f_Q1 = f81
+atan2f_Q_beta = f82
+atan2f_Q2 = f83
+atan2f_Q_beta2 = f84
+atan2f_Q3 = f85
+
+atan2f_r = f86
+atan2f_rsq = f87
+atan2f_poly_atan_U = f88
+
+
+// predicate registers
+//atan2f_Pred_Swap = p6 // |y| > |x|
+//atan2f_Pred_noSwap = p7 // |y| <= |x|
+//atan2f_Pred_Xpos = p8 // x >= 0
+//atan2f_Pred_Xneg = p9 // x < 0
+
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(atan2f_coef_table1)
+data8 0xBFD5555512191621 // p1
+data8 0x3F522E5D33BC9BAA // p10
+data8 0xBFA6E10BA401393F // p7
+data8 0x3FB142A73D7C54E3 // p6
+data8 0xBFC2473C5145EE38 // p3
+data8 0x3FC9997E7AFBFF4E // p2
+LOCAL_OBJECT_END(atan2f_coef_table1)
+
+LOCAL_OBJECT_START(atan2f_coef_table2)
+data8 0xBF7DEAADAA336451 // p9
+data8 0x3F97105B4160F86B // p8
+data8 0xBFB68EED6A8CFA32 // p5
+data8 0x3FBC4F512B1865F5 // p4
+data8 0x3ff921fb54442d18 // pi/2
+data8 0x400921fb54442d18 // pi
+data8 0x3fe921fb54442d18 // pi/4
+data8 0x4002d97c7f3321d2 // 3pi/4
+LOCAL_OBJECT_END(atan2f_coef_table2)
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(atan2f)
+
+{ .mfi
+ alloc r32 = ar.pfs,1,5,4,0
+ frcpa.s1 atan2f_Z0,p0 = f1,f8 // Approx to 1/y
+ nop.i 999
+}
+{ .mfi
+ addl atan2f_GR_Addr_1 = @ltoff(atan2f_coef_table1),gp
+ fma.s1 atan2f_xsq = f9,f9,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ld8 atan2f_GR_Addr_1 = [atan2f_GR_Addr_1]
+ frcpa.s1 atan2f_A0,p0 = f1,f9 // Approx to 1/x
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_ysq = f8,f8,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.ge.s1 p8,p9 = f9,f0 // Set p8 if x>=0, p9 if x<0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_xy = f9,f8,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ add atan2f_GR_Addr_2 = 0x30, atan2f_GR_Addr_1
+ fmerge.s atan2f_sgn_Y = f8,f1
+ nop.i 999 ;;
+}
+
+{ .mmf
+ ldfpd atan2f_coef_p1,atan2f_coef_p10 = [atan2f_GR_Addr_1],16
+ ldfpd atan2f_coef_p9,atan2f_coef_p8 = [atan2f_GR_Addr_2],16
+ fclass.m p10,p0 = f9,0xe7 // Test x @inf|@snan|@qnan|@zero
+}
+;;
+
+{ .mfi
+ ldfpd atan2f_coef_p7,atan2f_coef_p6 = [atan2f_GR_Addr_1],16
+ fma.s1 atan2f_T_denom = atan2f_Z0,atan2f_xsq,f8
+ nop.i 999
+}
+{ .mfi
+ ldfpd atan2f_coef_p5,atan2f_coef_p4 = [atan2f_GR_Addr_2],16
+ fma.s1 atan2f_Z = atan2f_Z0,f9,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ ldfpd atan2f_coef_p3,atan2f_coef_p2 = [atan2f_GR_Addr_1],16
+ fma.s1 atan2f_G_denom = atan2f_A0,atan2f_ysq,f9
+ nop.i 999
+}
+{ .mfi
+ ldfpd atan2f_const_piby2,atan2f_const_pi = [atan2f_GR_Addr_2],16
+ fma.s1 atan2f_A = atan2f_A0,f8,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ ldfpd atan2f_const_piby4,atan2f_const_3piby4 = [atan2f_GR_Addr_2]
+ fclass.m p11,p0 = f8,0xe7 // Test y @inf|@snan|@qnan|@zero
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 atan2f_T_numer = atan2f_Z0,atan2f_xy,f9
+(p10) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on x nan,inf,zero
+}
+
+
+// p6 if |y|>|x|, p7 if |x|>=|y| , use xsq and ysq for test
+{ .mfi
+ nop.m 999
+ fcmp.gt.s1 p6,p7 = atan2f_ysq,atan2f_xsq
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 atan2f_G_numer = atan2f_A0,atan2f_xy,f8
+(p11) br.cond.spnt ATAN2F_XY_INF_NAN_ZERO ;; // Branch on y nan,inf,zero
+}
+
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f0,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 atan2f_const_1 = atan2f_sgn_Y,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2f_U = atan2f_Z,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan2f_Usq = atan2f_Z,atan2f_Z,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_U = atan2f_A,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_Usq = atan2f_A,atan2f_A,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_T_denom
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan2f_R_denom = atan2f_T_denom,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p7) frcpa.s1 atan2f_Q1,p0 = f1,atan2f_G_denom
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_R_denom = atan2f_G_denom,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2f_R_numer = atan2f_T_numer,f1,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_R_numer = atan2f_G_numer,f1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+(p6) fnma.s1 atan2f_p1rnum = atan2f_T_numer,atan2f_coef_p1,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_p1rnum = atan2f_G_numer,atan2f_coef_p1,f0
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_U4 = atan2f_Usq,atan2f_Usq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u109 = atan2f_Usq,atan2f_coef_p10,atan2f_coef_p9
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u87 = atan2f_Usq,atan2f_coef_p8,atan2f_coef_p7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u65 = atan2f_Usq,atan2f_coef_p6,atan2f_coef_p5
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u43 = atan2f_Usq,atan2f_coef_p4,atan2f_coef_p3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnma.s1 atan2f_Q_beta = atan2f_Q1,atan2f_R_denom,f1
+ nop.i 999 ;;
+}
+
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u21 = atan2f_Usq,atan2f_coef_p2,atan2f_coef_p1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_r = atan2f_Q1,atan2f_R_numer,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 atan2f_C = atan2f_sgn_Y,atan2f_const_piby2,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 atan2f_C = atan2f_const_1,atan2f_const_pi,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_U6 = atan2f_U4,atan2f_Usq,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_U8 = atan2f_U4,atan2f_U4,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u10to7 = atan2f_U4,atan2f_poly_u109,atan2f_poly_u87
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_pR = atan2f_p1rnum,atan2f_Q1,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u6to3 = atan2f_U4,atan2f_poly_u65,atan2f_poly_u43
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_Q2 = atan2f_Q1,atan2f_Q_beta,atan2f_Q1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_Q_beta2 = atan2f_Q_beta,atan2f_Q_beta,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_rsq = atan2f_r,atan2f_r,f0
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u210 = atan2f_Usq,atan2f_poly_u21,f1
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p8,p0 = f8,f9 // Dummy op to set flag on denormal inputs
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u10to3 = atan2f_U8,atan2f_poly_u10to7,atan2f_poly_u6to3
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_Q3 = atan2f_Q2,atan2f_Q_beta2,atan2f_Q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_pRC = atan2f_rsq,atan2f_pR,atan2f_C
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_poly_u10to0 = atan2f_U6,atan2f_poly_u10to3,atan2f_poly_u210
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fma.s1 atan2f_pQRC = atan2f_R_numer,atan2f_Q3,atan2f_pRC
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = atan2f_U,atan2f_poly_u10to0,atan2f_pQRC
+ br.ret.sptk b0 ;;
+}
+
+
+
+ATAN2F_XY_INF_NAN_ZERO:
+
+{ .mfi
+ nop.m 999
+ fclass.m p10,p0 = f8,0xc3 // Is y nan
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p12,p0 = f9,0xc3 // Is x nan
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p6,p0 = f9,0x21 // Is x +inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p10) fma.s.s0 f8 = f9,f8,f0 // Result quietized y if y is nan
+(p10) br.ret.spnt b0 // Exit if y is nan
+}
+;;
+
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p7,p8 = f8,0x23 // x +inf, is y inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p12) fnorm.s.s0 f8 = f9 // Result quietized x if x is nan, y not nan
+(p12) br.ret.spnt b0 // Exit if x is nan, y not nan
+}
+;;
+
+// Here if x or y inf, or x or y zero
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p15,p0 = f8,f9 // Dummy op to set flag on denormal inputs
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p11,p12 = f9,0x22 // Is x -inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p7) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby4,f0 // Result +-pi/4
+(p7) br.ret.spnt b0 // Exit if x +inf and y inf
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p8) fmerge.s f8 = f8,f0 // If x +inf and y not inf, result +-0
+(p8) br.ret.spnt b0 // Exit if x +inf and y not inf
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fclass.m.unc p13,p0 = f8,0x23 // x not -inf, is y inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p11) fclass.m.unc p14,p15 = f8,0x23 // x -inf, is y inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p6,p7 = f9,0x7 // Is x zero
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p13) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // Result +-pi/2
+(p13) br.ret.spnt b0 // Exit if x not -inf and y inf
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_3piby4,f0 // Result +-3pi/4
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // Result +-pi
+(p11) br.ret.spnt b0 // Exit if x -inf
+}
+;;
+
+// Here if x or y zero
+{ .mfi
+ nop.m 999
+(p7) fclass.m.unc p8,p9 = f9,0x19 // x not zero, y zero, is x > zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p10,p11 = f8,0x7 // x zero, is y zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fmerge.s f8 = f8, f0 // x > zero and y zero, result is +-zero
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_pi,f0 // x < 0, y 0, result +-pi
+(p10) br.cond.spnt __libm_error_region // Branch if x zero and y zero
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fma.s.s0 f8 = atan2f_sgn_Y, atan2f_const_piby2,f0 // x zero, y not zero
+ br.ret.sptk b0 // Final special case exit
+}
+;;
+
+
+GLOBAL_IEEE754_END(atan2f)
+libm_alias_float_other (__atan2, atan2)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+ mov GR_Parameter_TAG = 38
+ fclass.m p10,p11 = f9,0x5 // @zero | @pos
+;;
+(p10) fmerge.s f10 = f8, f0
+(p11) fma.s.s0 f10 = atan2f_sgn_Y, atan2f_const_pi,f0
+;;
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 999
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+}
+;;
+
+{ .mmi
+ stfs [GR_Parameter_Y] = f9,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+}
+;;
+
+
+.body
+{ .mib
+ stfs [GR_Parameter_X] = f8 // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f10 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+}
+;;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+}
+;;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+}
+;;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atan2l.c b/sysdeps/ia64/fpu/e_atan2l.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atan2l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_atanh.S b/sysdeps/ia64/fpu/e_atanh.S
new file mode 100644
index 0000000000..3df73247f5
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atanh.S
@@ -0,0 +1,1071 @@
+.file "atanh.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// ==============================================================
+// History
+// ==============================================================
+// 05/03/01 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 05/26/03 Improved performance, fixed to handle unorms
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+// ==============================================================
+// double atanh(double)
+//
+// Overview of operation
+// ==============================================================
+//
+// There are 7 paths:
+// 1. x = +/-0.0
+// Return atanh(x) = +/-0.0
+//
+// 2. 0.0 < |x| < 1/4
+// Return atanh(x) = Po2l(x),
+// where Po2l(x) = (((((((((C9*x^2 + C8)*x^2 + C7)*x^2 + C6)*x^2 +
+// C5)*x^2 + C4)*x^2 + C3)*x^2 + C2)*x^2 + C1)* x^2 + C0)*x^3 + x
+// 3. 1/4 <= |x| < 1
+// Return atanh(x) = sign(x) * log((1 + |x|)/(1 - |x|))
+// To compute (1 + |x|)/(1 - |x|) modified Newton Raphson method is used
+// (3 iterations)
+// Algorithm description for log function see below.
+//
+// 4. |x| = 1
+// Return atanh(x) = sign(x) * +INF
+//
+// 5. 1 < |x| <= +INF
+// Return atanh(x) = QNaN
+//
+// 6. x = [S,Q]NaN
+// Return atanh(x) = QNaN
+//
+// 7. x = denormal
+// Return atanh(x) = x
+//
+//==============================================================
+// Algorithm Description for log(x) function
+// Below we are using the fact that inequality x - 1.0 > 2^(-6) is always true
+// for this atanh implementation
+//
+// Consider x = 2^N 1.f1 f2 f3 f4...f63
+// Log(x) = log(x * frcpa(x) / frcpa(x))
+// = log(x * frcpa(x)) + log(1/frcpa(x))
+// = log(x * frcpa(x)) - log(frcpa(x))
+//
+// frcpa(x) = 2^-N * frcpa(1.f1 f2 ... f63)
+//
+// -log(frcpa(x)) = -log(C)
+// = -log(2^-N) - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = N*log2 - log(frcpa(1.f1 f2 ... f63))
+//
+//
+// Log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+//
+// Log(x) = N*log2 + log(1./frcpa(1.f1 f2 ... f63)) + log(x * frcpa(x))
+// Log(x) = N*log2 + T + log(frcpa(x) x)
+//
+// Log(x) = N*log2 + T + log(C * x)
+//
+// C * x = 1 + r
+//
+// Log(x) = N*log2 + T + log(1 + r)
+// Log(x) = N*log2 + T + Series(r - r^2/2 + r^3/3 - r^4/4 + ...)
+//
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
+//
+// Implementation
+//==============================================================
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4 + P4*r^5 + P5*r^6
+//
+// x = f * 2*N where f is 1.f_1f_2f_3...f_63
+// Nfloat = float(n) where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
+//
+// result = (T + Nfloat * log(2)) + rseries
+//
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+// y_k = frcpa(x_k)
+// log(1/y_k) in quad and round to double-extended
+//
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f32 -> f77
+
+// General registers used:
+// r14 -> r27, r33 -> r39
+
+// Predicate registers used:
+// p6 -> p14
+
+// p10, p11 to indicate is argument positive or negative
+// p12 to filter out case when x = [Q,S]NaN or +/-0
+// p13 to filter out case when x = denormal
+// p6, p7 to filter out case when |x| >= 1
+// p8 to filter out case when |x| < 1/4
+
+// Assembly macros
+//==============================================================
+Data2Ptr = r14
+Data3Ptr = r15
+RcpTablePtr = r16
+rExpbMask = r17
+rBias = r18
+rNearZeroBound = r19
+rArgSExpb = r20
+rArgExpb = r21
+rSExpb = r22
+rExpb = r23
+rSig = r24
+rN = r25
+rInd = r26
+DataPtr = r27
+
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_SAVE_PFS = r35
+
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+atanh_GR_tag = r39
+
+//==============================================================
+fAbsX = f32
+fOneMx = f33
+fOnePx = f34
+fY = f35
+fR = f36
+fR2 = f37
+fR3 = f38
+fRcp = f39
+fY4Rcp = f40
+fRcp0 = f41
+fRcp0n = f42
+fRcp1 = f43
+fRcp2 = f44
+fRcp3 = f45
+fN4Cvt = f46
+fN = f47
+fY2 = f48
+fLog2 = f49
+fLogT = f50
+fLogT_N = f51
+fX2 = f52
+fX3 = f53
+fX4 = f54
+fX8 = f55
+fP0 = f56
+fP5 = f57
+fP4 = f58
+fP3 = f59
+fP2 = f60
+fP1 = f61
+fNormX = f62
+fC9 = f63
+fC8 = f64
+fC7 = f65
+fC6 = f66
+fC5 = f67
+fC4 = f68
+fC3 = f69
+fC2 = f70
+fC1 = f71
+fC0 = f72
+fP98 = f73
+fP76 = f74
+fP54 = f75
+fP32 = f76
+fP10 = f77
+
+// Data tables
+//==============================================================
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(atanh_data)
+data8 0xBFC5555DA7212371 // P5
+data8 0x3FC999A19EEF5826 // P4
+data8 0xBFCFFFFFFFFEF009 // P3
+data8 0x3FD555555554ECB2 // P2
+data8 0xBFE0000000000000 // P1 = -0.5
+data8 0x0000000000000000 // pad
+data8 0xb17217f7d1cf79ac , 0x00003ffd // 0.5*log(2)
+data8 0x0000000000000000 , 0x00000000 // pad to eliminate bank conflicts
+LOCAL_OBJECT_END(atanh_data)
+
+LOCAL_OBJECT_START(atanh_data_2)
+data8 0x8649FB89D3AD51FB , 0x00003FFB // C9
+data8 0xCC10AABEF160077A , 0x00003FFA // C8
+data8 0xF1EDB99AC0819CE2 , 0x00003FFA // C7
+data8 0x8881E53A809AD24D , 0x00003FFB // C6
+data8 0x9D8A116EF212F271 , 0x00003FFB // C5
+data8 0xBA2E8A6D1D756453 , 0x00003FFB // C4
+data8 0xE38E38E7A0945692 , 0x00003FFB // C3
+data8 0x924924924536891A , 0x00003FFC // C2
+data8 0xCCCCCCCCCCD08D51 , 0x00003FFC // C1
+data8 0xAAAAAAAAAAAAAA0C , 0x00003FFD // C0
+LOCAL_OBJECT_END(atanh_data_2)
+
+
+LOCAL_OBJECT_START(atanh_data_3)
+data8 0x80200aaeac44ef38 , 0x00003ff5 // log(1/frcpa(1+0/2^-8))/2
+//
+data8 0xc09090a2c35aa070 , 0x00003ff6 // log(1/frcpa(1+1/2^-8))/2
+data8 0xa0c94fcb41977c75 , 0x00003ff7 // log(1/frcpa(1+2/2^-8))/2
+data8 0xe18b9c263af83301 , 0x00003ff7 // log(1/frcpa(1+3/2^-8))/2
+data8 0x8d35c8d6399c30ea , 0x00003ff8 // log(1/frcpa(1+4/2^-8))/2
+data8 0xadd4d2ecd601cbb8 , 0x00003ff8 // log(1/frcpa(1+5/2^-8))/2
+//
+data8 0xce95403a192f9f01 , 0x00003ff8 // log(1/frcpa(1+6/2^-8))/2
+data8 0xeb59392cbcc01096 , 0x00003ff8 // log(1/frcpa(1+7/2^-8))/2
+data8 0x862c7d0cefd54c5d , 0x00003ff9 // log(1/frcpa(1+8/2^-8))/2
+data8 0x94aa63c65e70d499 , 0x00003ff9 // log(1/frcpa(1+9/2^-8))/2
+data8 0xa54a696d4b62b382 , 0x00003ff9 // log(1/frcpa(1+10/2^-8))/2
+//
+data8 0xb3e4a796a5dac208 , 0x00003ff9 // log(1/frcpa(1+11/2^-8))/2
+data8 0xc28c45b1878340a9 , 0x00003ff9 // log(1/frcpa(1+12/2^-8))/2
+data8 0xd35c55f39d7a6235 , 0x00003ff9 // log(1/frcpa(1+13/2^-8))/2
+data8 0xe220f037b954f1f5 , 0x00003ff9 // log(1/frcpa(1+14/2^-8))/2
+data8 0xf0f3389b036834f3 , 0x00003ff9 // log(1/frcpa(1+15/2^-8))/2
+//
+data8 0xffd3488d5c980465 , 0x00003ff9 // log(1/frcpa(1+16/2^-8))/2
+data8 0x87609ce2ed300490 , 0x00003ffa // log(1/frcpa(1+17/2^-8))/2
+data8 0x8ede9321e8c85927 , 0x00003ffa // log(1/frcpa(1+18/2^-8))/2
+data8 0x96639427f2f8e2f4 , 0x00003ffa // log(1/frcpa(1+19/2^-8))/2
+data8 0x9defad3e8f73217b , 0x00003ffa // log(1/frcpa(1+20/2^-8))/2
+//
+data8 0xa582ebd50097029c , 0x00003ffa // log(1/frcpa(1+21/2^-8))/2
+data8 0xac06dbe75ab80fee , 0x00003ffa // log(1/frcpa(1+22/2^-8))/2
+data8 0xb3a78449b2d3ccca , 0x00003ffa // log(1/frcpa(1+23/2^-8))/2
+data8 0xbb4f79635ab46bb2 , 0x00003ffa // log(1/frcpa(1+24/2^-8))/2
+data8 0xc2fec93a83523f3f , 0x00003ffa // log(1/frcpa(1+25/2^-8))/2
+//
+data8 0xc99af2eaca4c4571 , 0x00003ffa // log(1/frcpa(1+26/2^-8))/2
+data8 0xd1581106472fa653 , 0x00003ffa // log(1/frcpa(1+27/2^-8))/2
+data8 0xd8002560d4355f2e , 0x00003ffa // log(1/frcpa(1+28/2^-8))/2
+data8 0xdfcb43b4fe508632 , 0x00003ffa // log(1/frcpa(1+29/2^-8))/2
+data8 0xe67f6dff709d4119 , 0x00003ffa // log(1/frcpa(1+30/2^-8))/2
+//
+data8 0xed393b1c22351280 , 0x00003ffa // log(1/frcpa(1+31/2^-8))/2
+data8 0xf5192bff087bcc35 , 0x00003ffa // log(1/frcpa(1+32/2^-8))/2
+data8 0xfbdf4ff6dfef2fa3 , 0x00003ffa // log(1/frcpa(1+33/2^-8))/2
+data8 0x81559a97f92f9cc7 , 0x00003ffb // log(1/frcpa(1+34/2^-8))/2
+data8 0x84be72bce90266e8 , 0x00003ffb // log(1/frcpa(1+35/2^-8))/2
+//
+data8 0x88bc74113f23def2 , 0x00003ffb // log(1/frcpa(1+36/2^-8))/2
+data8 0x8c2ba3edf6799d11 , 0x00003ffb // log(1/frcpa(1+37/2^-8))/2
+data8 0x8f9dc92f92ea08b1 , 0x00003ffb // log(1/frcpa(1+38/2^-8))/2
+data8 0x9312e8f36efab5a7 , 0x00003ffb // log(1/frcpa(1+39/2^-8))/2
+data8 0x968b08643409ceb6 , 0x00003ffb // log(1/frcpa(1+40/2^-8))/2
+//
+data8 0x9a062cba08a1708c , 0x00003ffb // log(1/frcpa(1+41/2^-8))/2
+data8 0x9d845b3abf95485c , 0x00003ffb // log(1/frcpa(1+42/2^-8))/2
+data8 0xa06fd841bc001bb4 , 0x00003ffb // log(1/frcpa(1+43/2^-8))/2
+data8 0xa3f3a74652fbe0db , 0x00003ffb // log(1/frcpa(1+44/2^-8))/2
+data8 0xa77a8fb2336f20f5 , 0x00003ffb // log(1/frcpa(1+45/2^-8))/2
+//
+data8 0xab0497015d28b0a0 , 0x00003ffb // log(1/frcpa(1+46/2^-8))/2
+data8 0xae91c2be6ba6a615 , 0x00003ffb // log(1/frcpa(1+47/2^-8))/2
+data8 0xb189d1b99aebb20b , 0x00003ffb // log(1/frcpa(1+48/2^-8))/2
+data8 0xb51cced5de9c1b2c , 0x00003ffb // log(1/frcpa(1+49/2^-8))/2
+data8 0xb819bee9e720d42f , 0x00003ffb // log(1/frcpa(1+50/2^-8))/2
+//
+data8 0xbbb2a0947b093a5d , 0x00003ffb // log(1/frcpa(1+51/2^-8))/2
+data8 0xbf4ec1505811684a , 0x00003ffb // log(1/frcpa(1+52/2^-8))/2
+data8 0xc2535bacfa8975ff , 0x00003ffb // log(1/frcpa(1+53/2^-8))/2
+data8 0xc55a3eafad187eb8 , 0x00003ffb // log(1/frcpa(1+54/2^-8))/2
+data8 0xc8ff2484b2c0da74 , 0x00003ffb // log(1/frcpa(1+55/2^-8))/2
+//
+data8 0xcc0b1a008d53ab76 , 0x00003ffb // log(1/frcpa(1+56/2^-8))/2
+data8 0xcfb6203844b3209b , 0x00003ffb // log(1/frcpa(1+57/2^-8))/2
+data8 0xd2c73949a47a19f5 , 0x00003ffb // log(1/frcpa(1+58/2^-8))/2
+data8 0xd5daae18b49d6695 , 0x00003ffb // log(1/frcpa(1+59/2^-8))/2
+data8 0xd8f08248cf7e8019 , 0x00003ffb // log(1/frcpa(1+60/2^-8))/2
+//
+data8 0xdca7749f1b3e540e , 0x00003ffb // log(1/frcpa(1+61/2^-8))/2
+data8 0xdfc28e033aaaf7c7 , 0x00003ffb // log(1/frcpa(1+62/2^-8))/2
+data8 0xe2e012a5f91d2f55 , 0x00003ffb // log(1/frcpa(1+63/2^-8))/2
+data8 0xe600064ed9e292a8 , 0x00003ffb // log(1/frcpa(1+64/2^-8))/2
+data8 0xe9226cce42b39f60 , 0x00003ffb // log(1/frcpa(1+65/2^-8))/2
+//
+data8 0xec4749fd97a28360 , 0x00003ffb // log(1/frcpa(1+66/2^-8))/2
+data8 0xef6ea1bf57780495 , 0x00003ffb // log(1/frcpa(1+67/2^-8))/2
+data8 0xf29877ff38809091 , 0x00003ffb // log(1/frcpa(1+68/2^-8))/2
+data8 0xf5c4d0b245cb89be , 0x00003ffb // log(1/frcpa(1+69/2^-8))/2
+data8 0xf8f3afd6fcdef3aa , 0x00003ffb // log(1/frcpa(1+70/2^-8))/2
+//
+data8 0xfc2519756be1abc7 , 0x00003ffb // log(1/frcpa(1+71/2^-8))/2
+data8 0xff59119f503e6832 , 0x00003ffb // log(1/frcpa(1+72/2^-8))/2
+data8 0x8147ce381ae0e146 , 0x00003ffc // log(1/frcpa(1+73/2^-8))/2
+data8 0x82e45f06cb1ad0f2 , 0x00003ffc // log(1/frcpa(1+74/2^-8))/2
+data8 0x842f5c7c573cbaa2 , 0x00003ffc // log(1/frcpa(1+75/2^-8))/2
+//
+data8 0x85ce471968c8893a , 0x00003ffc // log(1/frcpa(1+76/2^-8))/2
+data8 0x876e8305bc04066d , 0x00003ffc // log(1/frcpa(1+77/2^-8))/2
+data8 0x891012678031fbb3 , 0x00003ffc // log(1/frcpa(1+78/2^-8))/2
+data8 0x8a5f1493d766a05f , 0x00003ffc // log(1/frcpa(1+79/2^-8))/2
+data8 0x8c030c778c56fa00 , 0x00003ffc // log(1/frcpa(1+80/2^-8))/2
+//
+data8 0x8da85df17e31d9ae , 0x00003ffc // log(1/frcpa(1+81/2^-8))/2
+data8 0x8efa663e7921687e , 0x00003ffc // log(1/frcpa(1+82/2^-8))/2
+data8 0x90a22b6875c6a1f8 , 0x00003ffc // log(1/frcpa(1+83/2^-8))/2
+data8 0x91f62cc8f5d24837 , 0x00003ffc // log(1/frcpa(1+84/2^-8))/2
+data8 0x93a06cfc3857d980 , 0x00003ffc // log(1/frcpa(1+85/2^-8))/2
+//
+data8 0x94f66d5e6fd01ced , 0x00003ffc // log(1/frcpa(1+86/2^-8))/2
+data8 0x96a330156e6772f2 , 0x00003ffc // log(1/frcpa(1+87/2^-8))/2
+data8 0x97fb3582754ea25b , 0x00003ffc // log(1/frcpa(1+88/2^-8))/2
+data8 0x99aa8259aad1bbf2 , 0x00003ffc // log(1/frcpa(1+89/2^-8))/2
+data8 0x9b0492f6227ae4a8 , 0x00003ffc // log(1/frcpa(1+90/2^-8))/2
+//
+data8 0x9c5f8e199bf3a7a5 , 0x00003ffc // log(1/frcpa(1+91/2^-8))/2
+data8 0x9e1293b9998c1daa , 0x00003ffc // log(1/frcpa(1+92/2^-8))/2
+data8 0x9f6fa31e0b41f308 , 0x00003ffc // log(1/frcpa(1+93/2^-8))/2
+data8 0xa0cda11eaf46390e , 0x00003ffc // log(1/frcpa(1+94/2^-8))/2
+data8 0xa22c8f029cfa45aa , 0x00003ffc // log(1/frcpa(1+95/2^-8))/2
+//
+data8 0xa3e48badb7856b34 , 0x00003ffc // log(1/frcpa(1+96/2^-8))/2
+data8 0xa5459a0aa95849f9 , 0x00003ffc // log(1/frcpa(1+97/2^-8))/2
+data8 0xa6a79c84480cfebd , 0x00003ffc // log(1/frcpa(1+98/2^-8))/2
+data8 0xa80a946d0fcb3eb2 , 0x00003ffc // log(1/frcpa(1+99/2^-8))/2
+data8 0xa96e831a3ea7b314 , 0x00003ffc // log(1/frcpa(1+100/2^-8))/2
+//
+data8 0xaad369e3dc544e3b , 0x00003ffc // log(1/frcpa(1+101/2^-8))/2
+data8 0xac92e9588952c815 , 0x00003ffc // log(1/frcpa(1+102/2^-8))/2
+data8 0xadfa035aa1ed8fdc , 0x00003ffc // log(1/frcpa(1+103/2^-8))/2
+data8 0xaf6219eae1ad6e34 , 0x00003ffc // log(1/frcpa(1+104/2^-8))/2
+data8 0xb0cb2e6d8160f753 , 0x00003ffc // log(1/frcpa(1+105/2^-8))/2
+//
+data8 0xb2354249ad950f72 , 0x00003ffc // log(1/frcpa(1+106/2^-8))/2
+data8 0xb3a056e98ef4a3b4 , 0x00003ffc // log(1/frcpa(1+107/2^-8))/2
+data8 0xb50c6dba52c6292a , 0x00003ffc // log(1/frcpa(1+108/2^-8))/2
+data8 0xb679882c33876165 , 0x00003ffc // log(1/frcpa(1+109/2^-8))/2
+data8 0xb78c07429785cedc , 0x00003ffc // log(1/frcpa(1+110/2^-8))/2
+//
+data8 0xb8faeb8dc4a77d24 , 0x00003ffc // log(1/frcpa(1+111/2^-8))/2
+data8 0xba6ad77eb36ae0d6 , 0x00003ffc // log(1/frcpa(1+112/2^-8))/2
+data8 0xbbdbcc915e9bee50 , 0x00003ffc // log(1/frcpa(1+113/2^-8))/2
+data8 0xbd4dcc44f8cf12ef , 0x00003ffc // log(1/frcpa(1+114/2^-8))/2
+data8 0xbec0d81bf5b531fa , 0x00003ffc // log(1/frcpa(1+115/2^-8))/2
+//
+data8 0xc034f19c139186f4 , 0x00003ffc // log(1/frcpa(1+116/2^-8))/2
+data8 0xc14cb69f7c5e55ab , 0x00003ffc // log(1/frcpa(1+117/2^-8))/2
+data8 0xc2c2abbb6e5fd56f , 0x00003ffc // log(1/frcpa(1+118/2^-8))/2
+data8 0xc439b2c193e6771e , 0x00003ffc // log(1/frcpa(1+119/2^-8))/2
+data8 0xc553acb9d5c67733 , 0x00003ffc // log(1/frcpa(1+120/2^-8))/2
+//
+data8 0xc6cc96e441272441 , 0x00003ffc // log(1/frcpa(1+121/2^-8))/2
+data8 0xc8469753eca88c30 , 0x00003ffc // log(1/frcpa(1+122/2^-8))/2
+data8 0xc962cf3ce072b05c , 0x00003ffc // log(1/frcpa(1+123/2^-8))/2
+data8 0xcadeba8771f694aa , 0x00003ffc // log(1/frcpa(1+124/2^-8))/2
+data8 0xcc5bc08d1f72da94 , 0x00003ffc // log(1/frcpa(1+125/2^-8))/2
+//
+data8 0xcd7a3f99ea035c29 , 0x00003ffc // log(1/frcpa(1+126/2^-8))/2
+data8 0xcef93860c8a53c35 , 0x00003ffc // log(1/frcpa(1+127/2^-8))/2
+data8 0xd0192f68a7ed23df , 0x00003ffc // log(1/frcpa(1+128/2^-8))/2
+data8 0xd19a201127d3c645 , 0x00003ffc // log(1/frcpa(1+129/2^-8))/2
+data8 0xd2bb92f4061c172c , 0x00003ffc // log(1/frcpa(1+130/2^-8))/2
+//
+data8 0xd43e80b2ee8cc8fc , 0x00003ffc // log(1/frcpa(1+131/2^-8))/2
+data8 0xd56173601fc4ade4 , 0x00003ffc // log(1/frcpa(1+132/2^-8))/2
+data8 0xd6e6637efb54086f , 0x00003ffc // log(1/frcpa(1+133/2^-8))/2
+data8 0xd80ad9f58f3c8193 , 0x00003ffc // log(1/frcpa(1+134/2^-8))/2
+data8 0xd991d1d31aca41f8 , 0x00003ffc // log(1/frcpa(1+135/2^-8))/2
+//
+data8 0xdab7d02231484a93 , 0x00003ffc // log(1/frcpa(1+136/2^-8))/2
+data8 0xdc40d532cde49a54 , 0x00003ffc // log(1/frcpa(1+137/2^-8))/2
+data8 0xdd685f79ed8b265e , 0x00003ffc // log(1/frcpa(1+138/2^-8))/2
+data8 0xde9094bbc0e17b1d , 0x00003ffc // log(1/frcpa(1+139/2^-8))/2
+data8 0xe01c91b78440c425 , 0x00003ffc // log(1/frcpa(1+140/2^-8))/2
+//
+data8 0xe14658f26997e729 , 0x00003ffc // log(1/frcpa(1+141/2^-8))/2
+data8 0xe270cdc2391e0d23 , 0x00003ffc // log(1/frcpa(1+142/2^-8))/2
+data8 0xe3ffce3a2aa64922 , 0x00003ffc // log(1/frcpa(1+143/2^-8))/2
+data8 0xe52bdb274ed82887 , 0x00003ffc // log(1/frcpa(1+144/2^-8))/2
+data8 0xe6589852e75d7df6 , 0x00003ffc // log(1/frcpa(1+145/2^-8))/2
+//
+data8 0xe786068c79937a7d , 0x00003ffc // log(1/frcpa(1+146/2^-8))/2
+data8 0xe91903adad100911 , 0x00003ffc // log(1/frcpa(1+147/2^-8))/2
+data8 0xea481236f7d35bb0 , 0x00003ffc // log(1/frcpa(1+148/2^-8))/2
+data8 0xeb77d48c692e6b14 , 0x00003ffc // log(1/frcpa(1+149/2^-8))/2
+data8 0xeca84b83d7297b87 , 0x00003ffc // log(1/frcpa(1+150/2^-8))/2
+//
+data8 0xedd977f4962aa158 , 0x00003ffc // log(1/frcpa(1+151/2^-8))/2
+data8 0xef7179a22f257754 , 0x00003ffc // log(1/frcpa(1+152/2^-8))/2
+data8 0xf0a450d139366ca7 , 0x00003ffc // log(1/frcpa(1+153/2^-8))/2
+data8 0xf1d7e0524ff9ffdb , 0x00003ffc // log(1/frcpa(1+154/2^-8))/2
+data8 0xf30c29036a8b6cae , 0x00003ffc // log(1/frcpa(1+155/2^-8))/2
+//
+data8 0xf4412bc411ea8d92 , 0x00003ffc // log(1/frcpa(1+156/2^-8))/2
+data8 0xf576e97564c8619d , 0x00003ffc // log(1/frcpa(1+157/2^-8))/2
+data8 0xf6ad62fa1b5f172f , 0x00003ffc // log(1/frcpa(1+158/2^-8))/2
+data8 0xf7e499368b55c542 , 0x00003ffc // log(1/frcpa(1+159/2^-8))/2
+data8 0xf91c8d10abaffe22 , 0x00003ffc // log(1/frcpa(1+160/2^-8))/2
+//
+data8 0xfa553f7018c966f3 , 0x00003ffc // log(1/frcpa(1+161/2^-8))/2
+data8 0xfb8eb13e185d802c , 0x00003ffc // log(1/frcpa(1+162/2^-8))/2
+data8 0xfcc8e3659d9bcbed , 0x00003ffc // log(1/frcpa(1+163/2^-8))/2
+data8 0xfe03d6d34d487fd2 , 0x00003ffc // log(1/frcpa(1+164/2^-8))/2
+data8 0xff3f8c7581e9f0ae , 0x00003ffc // log(1/frcpa(1+165/2^-8))/2
+//
+data8 0x803e029e280173ae , 0x00003ffd // log(1/frcpa(1+166/2^-8))/2
+data8 0x80dca10cc52d0757 , 0x00003ffd // log(1/frcpa(1+167/2^-8))/2
+data8 0x817ba200632755a1 , 0x00003ffd // log(1/frcpa(1+168/2^-8))/2
+data8 0x821b05f3b01d6774 , 0x00003ffd // log(1/frcpa(1+169/2^-8))/2
+data8 0x82bacd623ff19d06 , 0x00003ffd // log(1/frcpa(1+170/2^-8))/2
+//
+data8 0x835af8c88e7a8f47 , 0x00003ffd // log(1/frcpa(1+171/2^-8))/2
+data8 0x83c5f8299e2b4091 , 0x00003ffd // log(1/frcpa(1+172/2^-8))/2
+data8 0x8466cb43f3d87300 , 0x00003ffd // log(1/frcpa(1+173/2^-8))/2
+data8 0x850803a67c80ca4b , 0x00003ffd // log(1/frcpa(1+174/2^-8))/2
+data8 0x85a9a1d11a23b461 , 0x00003ffd // log(1/frcpa(1+175/2^-8))/2
+//
+data8 0x864ba644a18e6e05 , 0x00003ffd // log(1/frcpa(1+176/2^-8))/2
+data8 0x86ee1182dcc432f7 , 0x00003ffd // log(1/frcpa(1+177/2^-8))/2
+data8 0x875a925d7e48c316 , 0x00003ffd // log(1/frcpa(1+178/2^-8))/2
+data8 0x87fdaa109d23aef7 , 0x00003ffd // log(1/frcpa(1+179/2^-8))/2
+data8 0x88a129ed4becfaf2 , 0x00003ffd // log(1/frcpa(1+180/2^-8))/2
+//
+data8 0x89451278ecd7f9cf , 0x00003ffd // log(1/frcpa(1+181/2^-8))/2
+data8 0x89b29295f8432617 , 0x00003ffd // log(1/frcpa(1+182/2^-8))/2
+data8 0x8a572ac5a5496882 , 0x00003ffd // log(1/frcpa(1+183/2^-8))/2
+data8 0x8afc2d0ce3b2dadf , 0x00003ffd // log(1/frcpa(1+184/2^-8))/2
+data8 0x8b6a69c608cfd3af , 0x00003ffd // log(1/frcpa(1+185/2^-8))/2
+//
+data8 0x8c101e106e899a83 , 0x00003ffd // log(1/frcpa(1+186/2^-8))/2
+data8 0x8cb63de258f9d626 , 0x00003ffd // log(1/frcpa(1+187/2^-8))/2
+data8 0x8d2539c5bd19e2b1 , 0x00003ffd // log(1/frcpa(1+188/2^-8))/2
+data8 0x8dcc0e064b29e6f1 , 0x00003ffd // log(1/frcpa(1+189/2^-8))/2
+data8 0x8e734f45d88357ae , 0x00003ffd // log(1/frcpa(1+190/2^-8))/2
+//
+data8 0x8ee30cef034a20db , 0x00003ffd // log(1/frcpa(1+191/2^-8))/2
+data8 0x8f8b0515686d1d06 , 0x00003ffd // log(1/frcpa(1+192/2^-8))/2
+data8 0x90336bba039bf32f , 0x00003ffd // log(1/frcpa(1+193/2^-8))/2
+data8 0x90a3edd23d1c9d58 , 0x00003ffd // log(1/frcpa(1+194/2^-8))/2
+data8 0x914d0de2f5d61b32 , 0x00003ffd // log(1/frcpa(1+195/2^-8))/2
+//
+data8 0x91be0c20d28173b5 , 0x00003ffd // log(1/frcpa(1+196/2^-8))/2
+data8 0x9267e737c06cd34a , 0x00003ffd // log(1/frcpa(1+197/2^-8))/2
+data8 0x92d962ae6abb1237 , 0x00003ffd // log(1/frcpa(1+198/2^-8))/2
+data8 0x9383fa6afbe2074c , 0x00003ffd // log(1/frcpa(1+199/2^-8))/2
+data8 0x942f0421651c1c4e , 0x00003ffd // log(1/frcpa(1+200/2^-8))/2
+//
+data8 0x94a14a3845bb985e , 0x00003ffd // log(1/frcpa(1+201/2^-8))/2
+data8 0x954d133857f861e7 , 0x00003ffd // log(1/frcpa(1+202/2^-8))/2
+data8 0x95bfd96468e604c4 , 0x00003ffd // log(1/frcpa(1+203/2^-8))/2
+data8 0x9632d31cafafa858 , 0x00003ffd // log(1/frcpa(1+204/2^-8))/2
+data8 0x96dfaabd86fa1647 , 0x00003ffd // log(1/frcpa(1+205/2^-8))/2
+//
+data8 0x9753261fcbb2a594 , 0x00003ffd // log(1/frcpa(1+206/2^-8))/2
+data8 0x9800c11b426b996d , 0x00003ffd // log(1/frcpa(1+207/2^-8))/2
+data8 0x9874bf4d45ae663c , 0x00003ffd // log(1/frcpa(1+208/2^-8))/2
+data8 0x99231f5ee9a74f79 , 0x00003ffd // log(1/frcpa(1+209/2^-8))/2
+data8 0x9997a18a56bcad28 , 0x00003ffd // log(1/frcpa(1+210/2^-8))/2
+//
+data8 0x9a46c873a3267e79 , 0x00003ffd // log(1/frcpa(1+211/2^-8))/2
+data8 0x9abbcfc621eb6cb6 , 0x00003ffd // log(1/frcpa(1+212/2^-8))/2
+data8 0x9b310cb0d354c990 , 0x00003ffd // log(1/frcpa(1+213/2^-8))/2
+data8 0x9be14cf9e1b3515c , 0x00003ffd // log(1/frcpa(1+214/2^-8))/2
+data8 0x9c5710b8cbb73a43 , 0x00003ffd // log(1/frcpa(1+215/2^-8))/2
+//
+data8 0x9ccd0abd301f399c , 0x00003ffd // log(1/frcpa(1+216/2^-8))/2
+data8 0x9d7e67f3bdce8888 , 0x00003ffd // log(1/frcpa(1+217/2^-8))/2
+data8 0x9df4ea81a99daa01 , 0x00003ffd // log(1/frcpa(1+218/2^-8))/2
+data8 0x9e6ba405a54514ba , 0x00003ffd // log(1/frcpa(1+219/2^-8))/2
+data8 0x9f1e21c8c7bb62b3 , 0x00003ffd // log(1/frcpa(1+220/2^-8))/2
+//
+data8 0x9f956593f6b6355c , 0x00003ffd // log(1/frcpa(1+221/2^-8))/2
+data8 0xa00ce1092e5498c3 , 0x00003ffd // log(1/frcpa(1+222/2^-8))/2
+data8 0xa0c08309c4b912c1 , 0x00003ffd // log(1/frcpa(1+223/2^-8))/2
+data8 0xa1388a8c6faa2afa , 0x00003ffd // log(1/frcpa(1+224/2^-8))/2
+data8 0xa1b0ca7095b5f985 , 0x00003ffd // log(1/frcpa(1+225/2^-8))/2
+//
+data8 0xa22942eb47534a00 , 0x00003ffd // log(1/frcpa(1+226/2^-8))/2
+data8 0xa2de62326449d0a3 , 0x00003ffd // log(1/frcpa(1+227/2^-8))/2
+data8 0xa357690f88bfe345 , 0x00003ffd // log(1/frcpa(1+228/2^-8))/2
+data8 0xa3d0a93f45169a4b , 0x00003ffd // log(1/frcpa(1+229/2^-8))/2
+data8 0xa44a22f7ffe65f30 , 0x00003ffd // log(1/frcpa(1+230/2^-8))/2
+//
+data8 0xa500c5e5b4c1aa36 , 0x00003ffd // log(1/frcpa(1+231/2^-8))/2
+data8 0xa57ad064eb2ebbc2 , 0x00003ffd // log(1/frcpa(1+232/2^-8))/2
+data8 0xa5f5152dedf4384e , 0x00003ffd // log(1/frcpa(1+233/2^-8))/2
+data8 0xa66f9478856233ec , 0x00003ffd // log(1/frcpa(1+234/2^-8))/2
+data8 0xa6ea4e7cca02c32e , 0x00003ffd // log(1/frcpa(1+235/2^-8))/2
+//
+data8 0xa765437325341ccf , 0x00003ffd // log(1/frcpa(1+236/2^-8))/2
+data8 0xa81e21e6c75b4020 , 0x00003ffd // log(1/frcpa(1+237/2^-8))/2
+data8 0xa899ab333fe2b9ca , 0x00003ffd // log(1/frcpa(1+238/2^-8))/2
+data8 0xa9157039c51ebe71 , 0x00003ffd // log(1/frcpa(1+239/2^-8))/2
+data8 0xa991713433c2b999 , 0x00003ffd // log(1/frcpa(1+240/2^-8))/2
+//
+data8 0xaa0dae5cbcc048b3 , 0x00003ffd // log(1/frcpa(1+241/2^-8))/2
+data8 0xaa8a27ede5eb13ad , 0x00003ffd // log(1/frcpa(1+242/2^-8))/2
+data8 0xab06de228a9e3499 , 0x00003ffd // log(1/frcpa(1+243/2^-8))/2
+data8 0xab83d135dc633301 , 0x00003ffd // log(1/frcpa(1+244/2^-8))/2
+data8 0xac3fb076adc7fe7a , 0x00003ffd // log(1/frcpa(1+245/2^-8))/2
+//
+data8 0xacbd3cbbe47988f1 , 0x00003ffd // log(1/frcpa(1+246/2^-8))/2
+data8 0xad3b06b1a5dc57c3 , 0x00003ffd // log(1/frcpa(1+247/2^-8))/2
+data8 0xadb90e94af887717 , 0x00003ffd // log(1/frcpa(1+248/2^-8))/2
+data8 0xae3754a218f7c816 , 0x00003ffd // log(1/frcpa(1+249/2^-8))/2
+data8 0xaeb5d9175437afa2 , 0x00003ffd // log(1/frcpa(1+250/2^-8))/2
+//
+data8 0xaf349c322e9c7cee , 0x00003ffd // log(1/frcpa(1+251/2^-8))/2
+data8 0xafb39e30d1768d1c , 0x00003ffd // log(1/frcpa(1+252/2^-8))/2
+data8 0xb032df51c2c93116 , 0x00003ffd // log(1/frcpa(1+253/2^-8))/2
+data8 0xb0b25fd3e6035ad9 , 0x00003ffd // log(1/frcpa(1+254/2^-8))/2
+data8 0xb1321ff67cba178c , 0x00003ffd // log(1/frcpa(1+255/2^-8))/2
+LOCAL_OBJECT_END(atanh_data_3)
+
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(atanh)
+
+{ .mfi
+ getf.exp rArgSExpb = f8 // Must recompute if x unorm
+ fclass.m p13,p0 = f8, 0x0b // is arg denormal ?
+ mov rExpbMask = 0x1ffff
+}
+{ .mfi
+ addl DataPtr = @ltoff(atanh_data), gp
+ fnma.s1 fOneMx = f8, f1, f1 // fOneMx = 1 - x
+ mov rBias = 0xffff
+}
+;;
+
+{ .mfi
+ mov rNearZeroBound = 0xfffd // biased exp of 1/4
+ fclass.m p12,p0 = f8, 0xc7 // is arg NaN or +/-0 ?
+ nop.i 0
+}
+{ .mfi
+ ld8 DataPtr = [DataPtr]
+ fma.s1 fOnePx = f8, f1, f1 // fOnePx = 1 + x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.lt.s1 p10,p11 = f8,f0 // is x < 0 ?
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fnorm.s1 fNormX = f8 // Normalize x
+(p13) br.cond.spnt ATANH_UNORM // Branch if x=unorm
+}
+;;
+
+ATANH_COMMON:
+// Return here if x=unorm and not denorm
+{ .mfi
+ adds Data2Ptr = 0x50, DataPtr
+ fma.s1 fX2 = f8, f8, f0 // x^2
+ nop.i 0
+}
+{ .mfb
+ adds Data3Ptr = 0xC0, DataPtr
+(p12) fma.d.s0 f8 = f8,f1,f8 // NaN or +/-0
+(p12) br.ret.spnt b0 // Exit for x Nan or zero
+}
+;;
+
+{ .mfi
+ ldfe fC9 = [Data2Ptr], 16
+(p11) frcpa.s1 fRcp0, p0 = f1, fOneMx
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe fC8 = [Data2Ptr], 16
+(p10) frcpa.s1 fRcp0n, p0 = f1, fOnePx
+ and rArgExpb = rArgSExpb, rExpbMask // biased exponent
+}
+{ .mfi
+ nop.m 0
+(p10) fma.s1 fOneMx = fOnePx, f1, f0 // fOnePx = 1 - |x|
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe fC7 = [Data2Ptr], 16
+(p10) fnma.s1 fOnePx = fNormX, f1, f1 // fOnePx = 1 + |x|
+ cmp.ge p6,p0 = rArgExpb, rBias // is Expb(Arg) >= Expb(1) ?
+}
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt atanh_ge_one // Branch if |x| >=1.0
+}
+;;
+
+{ .mfi
+ ldfe fC6 = [Data2Ptr], 16
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe fC5 = [Data2Ptr], 16
+ fma.s1 fX4 = fX2, fX2, f0 // x^4
+ cmp.gt p8,p0 = rNearZeroBound, rArgExpb
+}
+{ .mfb
+ ldfe fC2 = [Data3Ptr], 16
+ fma.s1 fX3 = fX2, fNormX, f0 // x^3
+(p8) br.cond.spnt atanh_near_zero // Exit if 0 < |x| < 0.25
+}
+;;
+
+// Main path: 0.25 <= |x| < 1.0
+// NR method: iteration #1
+.pred.rel "mutex",p11,p10
+{ .mfi
+ ldfpd fP5, fP4 = [DataPtr], 16
+(p11) fnma.s1 fRcp1 = fRcp0, fOneMx, f1 // t = 1 - r0*x
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p10) fnma.s1 fRcp1 = fRcp0n, fOneMx, f1 // t = 1 - r0*x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfpd fP3, fP2 = [DataPtr], 16
+ // r1 = r0 + r0*t = r0 + r0*(1 - r0*x)
+(p11) fma.s1 fRcp1 = fRcp0, fRcp1, fRcp0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // r1 = r0 + r0*t = r0 + r0*(1 - r0*x)
+(p10) fma.s1 fRcp1 = fRcp0n, fRcp1, fRcp0n
+ nop.i 0
+}
+;;
+
+// NR method: iteration #2
+{ .mfi
+ ldfd fP1 = [DataPtr], 16
+ fnma.s1 fRcp2 = fRcp1, fOneMx, f1 // t = 1 - r1*x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe fLog2 = [DataPtr], 16
+ // r2 = r1 + r1*t = r1 + r1*(1 - r1*x)
+ fma.s1 fRcp2 = fRcp1, fRcp2, fRcp1
+ nop.i 0
+}
+;;
+
+// NR method: iteration #3
+{ .mfi
+ adds RcpTablePtr = 0xB0, DataPtr
+ fnma.s1 fRcp3 = fRcp2, fOneMx, f1 // t = 1 - r2*x
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fY4Rcp = fRcp2, fOnePx, f0 // fY4Rcp = r2*(1 + x)
+ nop.i 0
+}
+;;
+
+// polynomial approximation & final reconstruction
+{ .mfi
+ nop.m 0
+ frcpa.s1 fRcp, p0 = f1, fY4Rcp
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // y = r2 * (1 + x) + r2 * (1 + x) * t = (1 + x) * (r2 + r2*(1 - r2*x))
+ fma.s1 fY = fY4Rcp, fRcp3, fY4Rcp
+ nop.i 0
+}
+;;
+
+{ .mmi
+ getf.exp rSExpb = fY4Rcp // biased exponent and sign
+;;
+ getf.sig rSig = fY4Rcp // significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fms.s1 fR = fY, fRcp, f1 // fR = fY * fRcp - 1
+ nop.i 0
+}
+;;
+
+{ .mmi
+ and rExpb = rSExpb, rExpbMask
+;;
+ sub rN = rExpb, rBias // exponent
+ extr.u rInd = rSig,55,8 // Extract 8 bits
+}
+;;
+
+{ .mmi
+ setf.sig fN4Cvt = rN
+ shladd RcpTablePtr = rInd, 4, RcpTablePtr
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe fLogT = [RcpTablePtr]
+ fma.s1 fR2 = fR, fR, f0 // r^2
+ nop.i 0
+}
+{
+ nop.m 0
+ fma.s1 fP54 = fP5, fR, fP4 // P5*r + P4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP32 = fP3, fR, fP2 // P3*r + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fR3 = fR2, fR, f0 // r^3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP10 = fP1, fR2, fR // P1*r^2 + r
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf fN = fN4Cvt
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP54 = fP54, fR2, fP32 // (P5*r + P4)*r^2 + P3*r + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fLogT_N = fN, fLog2, fLogT // N*Log2 + LogT
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // ((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r
+ fma.s1 fP54 = fP54, fR3, fP10
+ nop.i 0
+}
+;;
+
+.pred.rel "mutex",p11,p10
+{ .mfi
+ nop.m 0
+ // 0.5*(((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r) + 0.5*(N*Log2 + T)
+(p11) fnma.d.s0 f8 = fP54, fP1, fLogT_N
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // -0.5*(((P5*r + P4)*r^2 + P3*r + P2)*r^3 + P1*r^2 + r) - 0.5*(N*Log2 + T)
+(p10) fms.d.s0 f8 = fP54, fP1, fLogT_N
+ br.ret.sptk b0 // Exit for 0.25 <= |x| < 1.0
+}
+;;
+
+// Here if 0 < |x| < 0.25
+atanh_near_zero:
+{ .mfi
+ ldfe fC4 = [Data2Ptr], 16
+ fma.s1 fP98 = fC9, fX2, fC8 // C9*x^2 + C8
+ nop.i 0
+}
+{ .mfi
+ ldfe fC1 = [Data3Ptr], 16
+ fma.s1 fP76 = fC7, fX2, fC6 // C7*x^2 + C6
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe fC3 = [Data2Ptr], 16
+ fma.s1 fX8 = fX4, fX4, f0 // x^8
+ nop.i 0
+}
+{ .mfi
+ ldfe fC0 = [Data3Ptr], 16
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP98 = fP98, fX4, fP76 // C9*x^6 + C8*x^4 + C7*x^2 + C6
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP54 = fC5, fX2, fC4 // C5*x^2 + C4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP32 = fC3, fX2, fC2 // C3*x^2 + C2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP10 = fC1, fX2, fC0 // C1*x^2 + C0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP54 = fP54, fX4, fP32 // C5*x^6 + C4*x^4 + C3*x^2 + C2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // C9*x^14 + C8*x^12 + C7*x^10 + C6*x^8 + C5*x^6 + C4*x^4 + C3*x^2 + C2
+ fma.s1 fP98 = fP98, fX8, fP54
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // C9*x^18 + C8*x^16 + C7*x^14 + C6*x^12 + C5*x^10 + C4*x^8 + C3*x^6 +
+ // C2*x^4 + C1*x^2 + C0
+ fma.s1 fP98 = fP98, fX4, fP10
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // C9*x^21 + C8*x^19 + C7*x^17 + C6*x^15 + C5*x^13 + C4*x^11 + C3*x^9 +
+ // C2*x^7 + C1*x^5 + C0*x^3 + x
+ fma.d.s0 f8 = fP98, fX3, fNormX
+ br.ret.sptk b0 // Exit for 0 < |x| < 0.25
+}
+;;
+
+ATANH_UNORM:
+// Here if x=unorm
+{ .mfi
+ getf.exp rArgSExpb = fNormX // Recompute if x unorm
+ fclass.m p0,p13 = fNormX, 0x0b // Test x denorm
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy to set denormal flag
+(p13) br.cond.sptk ATANH_COMMON // Continue if x unorm and not denorm
+}
+;;
+
+.pred.rel "mutex",p10,p11
+{ .mfi
+ nop.m 0
+(p10) fnma.d.s0 f8 = f8,f8,f8 // Result x-x^2 if x=-denorm
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p11) fma.d.s0 f8 = f8,f8,f8 // Result x+x^2 if x=+denorm
+ br.ret.spnt b0 // Exit if denorm
+}
+;;
+
+// Here if |x| >= 1.0
+atanh_ge_one:
+{ .mfi
+ alloc r32 = ar.pfs,1,3,4,0
+ fmerge.s fAbsX = f0, f8 // Form |x|
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.s f10 = f8, f8 // Save input for error call
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s1 p6,p7 = fAbsX, f1 // Test for |x| = 1.0
+ nop.i 0
+}
+;;
+
+// Set error tag and result, and raise invalid flag if |x| > 1.0
+{ .mfi
+(p7) mov atanh_GR_tag = 131
+(p7) frcpa.s0 f8, p0 = f0, f0 // Get QNaN, and raise invalid
+ nop.i 0
+}
+;;
+
+// Set error tag and result, and raise Z flag if |x| = 1.0
+{ .mfi
+ nop.m 0
+(p6) frcpa.s0 fRcp, p0 = f1, f0 // Get inf, and raise Z flag
+ nop.i 0
+}
+;;
+
+{ .mfb
+(p6) mov atanh_GR_tag = 132
+(p6) fmerge.s f8 = f8, fRcp // result is +-inf
+ br.cond.sptk __libm_error_region // Exit if |x| >= 1.0
+}
+;;
+
+GLOBAL_LIBM_END(atanh)
+libm_alias_double_other (atanh, atanh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfd [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfd [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atanhf.S b/sysdeps/ia64/fpu/e_atanhf.S
new file mode 100644
index 0000000000..f1ae43305a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atanhf.S
@@ -0,0 +1,845 @@
+.file "atanhf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 05/22/01 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/06/02 Improved Itanium 2 performance
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 05/26/03 Improved performance, fixed to handle unorms
+//
+// API
+//==============================================================
+// float atanhf(float)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+//
+// There are 7 paths:
+// 1. x = +/-0.0
+// Return atanhf(x) = +/-0.0
+//
+// 2. 0.0 < |x| <= MAX_DENORMAL_ABS
+// Return atanhf(x) = x + sign(x)*x^2
+//
+// 3. MAX_DENORMAL_ABS < |x| < 2^(-20)
+// Return atanhf(x) = Pol3(x), where Pol3(x) = x + x^3
+//
+// 4. 2^(-20) <= |x| < 1
+// Return atanhf(x) = 0.5 * (log(1 + x) - log(1 - x))
+// Algorithm description for log function see below.
+//
+// 5. |x| = 1
+// Return atanhf(x) = sign(x) * +INF
+//
+// 6. 1 < |x| <= +INF
+// Return atanhf(x) = QNaN
+//
+// 7. x = [S,Q]NaN
+// Return atanhf(x) = QNaN
+//
+//==============================================================
+// Algorithm Description for log(x) function
+//
+// Consider x = 2^N * 1.f1 f2 f3 f4...f63
+// log(x) = log(x * frcpa(x) / frcpa(x))
+// = log(x * frcpa(x)) + log(1/frcpa(x))
+// = log(x * frcpa(x)) - log(frcpa(x))
+//
+// frcpa(x) = 2^(-N) * frcpa(1.f1 f2 ... f63)
+//
+// -log(frcpa(x)) = -log(C)
+// = -log(2^(-N)) - log(frcpa(1.f1 f2 ... f63))
+//
+// -log(frcpa(x)) = -log(C)
+// = N*log2 - log(frcpa(1.f1 f2 ... f63))
+//
+//
+// log(x) = log(1/frcpa(x)) + log(frcpa(x) x)
+//
+// log(x) = N*log2 + log(1./frcpa(1.f1 f2 ... f63)) + log(x * frcpa(x))
+// log(x) = N*log2 + T + log(frcpa(x) x)
+//
+// Log(x) = N*log2 + T + log(C * x)
+//
+// C * x = 1 + r
+//
+// log(x) = N*log2 + T + log(1 + r)
+// log(x) = N*log2 + T + Series(r)
+//
+// 1.f1 f2 ... f8 has 256 entries.
+// They are 1 + k/2^8, k = 0 ... 255
+// These 256 values are the table entries.
+//
+// Implementation
+//==============================================================
+// C = frcpa(x)
+// r = C * x - 1
+//
+// Form rseries = r + P1*r^2 + P2*r^3 + P3*r^4
+//
+// x = f * 2*N where f is 1.f_1f_2f_3...f_63
+// Nfloat = float(n) where n is the true unbiased exponent
+// pre-index = f_1f_2....f_8
+// index = pre_index * 16
+// get the dxt table entry at index + offset = T
+//
+// result = (T + Nfloat * log(2)) + rseries
+//
+// The T table is calculated as follows
+// Form x_k = 1 + k/2^8 where k goes from 0... 255
+// y_k = frcpa(x_k)
+// log(1/y_k) in quad and round to double-extended
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f32 -> f59
+
+// General registers used:
+// r14 -> r29, r32 -> r39
+
+// Predicate registers used:
+// p6 -> p9
+
+// p6 to filter out case when |x| >= 1
+// p7 to filter out case when x = [Q,S]NaN or +/-0
+// p8 to filter out case when |x| < 2^(-20)
+// p9 to filter out case when x = denormal
+
+
+// Assembly macros
+//==============================================================
+DataPtr = r14
+RcpTablePtrM = r15
+RcpTablePtrP = r16
+rExpbMask = r17
+rBias = r18
+rNearZeroBound = r19
+rArgSExpb = r20
+rArgExpb = r21
+rExpbm = r22
+rExpbp = r23
+rSigm = r24
+rSigp = r25
+rNm = r26
+rNp = r27
+rIndm = r28
+rIndp = r29
+
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_SAVE_PFS = r35
+
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+atanh_GR_tag = r39
+
+//==============================================================
+fOneMx = f33
+fOnePx = f34
+fRm2 = f35
+fRm3 = f36
+fRp2 = f37
+fRp3 = f38
+fRcpM = f39
+fRcpP = f40
+fRp = f41
+fRm = f42
+fN4CvtM = f43
+fN4CvtP = f44
+fNm = f45
+fNp = f46
+fLogTm = f47
+fLogTp = f48
+fLog2 = f49
+fArgAbs = f50
+fNormX = f50
+fP32m = f51
+fP32p = f52
+fP10m = f53
+fP10p = f54
+fX2 = f55
+fP3 = f56
+fP2 = f57
+fP1 = f58
+fHalf = f59
+
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(atanhf_data)
+data8 0xbfc0001008f39d59 // P3*0.5
+data8 0x3fc5556073e0c45a // P2*0.5
+data8 0xbfcffffffffaea15 // P1*0.5
+data8 0x3fe0000000000000 // 0.5
+data8 0x3fd62e42fefa39ef // 0.5*ln(2)
+data8 0x0000000000000000 // pad
+LOCAL_OBJECT_END(atanhf_data)
+
+LOCAL_OBJECT_START(atanhf_data2)
+data8 0x3f50040155d5889e //log(1/frcpa(1+0/256))/2
+data8 0x3f68121214586b54 //log(1/frcpa(1+1/256))/2
+data8 0x3f741929f96832f0 //log(1/frcpa(1+2/256))/2
+data8 0x3f7c317384c75f06 //log(1/frcpa(1+3/256))/2
+data8 0x3f81a6b91ac73386 //log(1/frcpa(1+4/256))/2
+data8 0x3f85ba9a5d9ac039 //log(1/frcpa(1+5/256))/2
+data8 0x3f89d2a8074325f4 //log(1/frcpa(1+6/256))/2
+data8 0x3f8d6b2725979802 //log(1/frcpa(1+7/256))/2
+data8 0x3f90c58fa19dfaaa //log(1/frcpa(1+8/256))/2
+data8 0x3f92954c78cbce1b //log(1/frcpa(1+9/256))/2
+data8 0x3f94a94d2da96c56 //log(1/frcpa(1+10/256))/2
+data8 0x3f967c94f2d4bb58 //log(1/frcpa(1+11/256))/2
+data8 0x3f985188b630f068 //log(1/frcpa(1+12/256))/2
+data8 0x3f9a6b8abe73af4c //log(1/frcpa(1+13/256))/2
+data8 0x3f9c441e06f72a9e //log(1/frcpa(1+14/256))/2
+data8 0x3f9e1e6713606d07 //log(1/frcpa(1+15/256))/2
+data8 0x3f9ffa6911ab9301 //log(1/frcpa(1+16/256))/2
+data8 0x3fa0ec139c5da601 //log(1/frcpa(1+17/256))/2
+data8 0x3fa1dbd2643d190b //log(1/frcpa(1+18/256))/2
+data8 0x3fa2cc7284fe5f1c //log(1/frcpa(1+19/256))/2
+data8 0x3fa3bdf5a7d1ee64 //log(1/frcpa(1+20/256))/2
+data8 0x3fa4b05d7aa012e0 //log(1/frcpa(1+21/256))/2
+data8 0x3fa580db7ceb5702 //log(1/frcpa(1+22/256))/2
+data8 0x3fa674f089365a7a //log(1/frcpa(1+23/256))/2
+data8 0x3fa769ef2c6b568d //log(1/frcpa(1+24/256))/2
+data8 0x3fa85fd927506a48 //log(1/frcpa(1+25/256))/2
+data8 0x3fa9335e5d594989 //log(1/frcpa(1+26/256))/2
+data8 0x3faa2b0220c8e5f5 //log(1/frcpa(1+27/256))/2
+data8 0x3fab0004ac1a86ac //log(1/frcpa(1+28/256))/2
+data8 0x3fabf968769fca11 //log(1/frcpa(1+29/256))/2
+data8 0x3faccfedbfee13a8 //log(1/frcpa(1+30/256))/2
+data8 0x3fada727638446a2 //log(1/frcpa(1+31/256))/2
+data8 0x3faea3257fe10f7a //log(1/frcpa(1+32/256))/2
+data8 0x3faf7be9fedbfde6 //log(1/frcpa(1+33/256))/2
+data8 0x3fb02ab352ff25f4 //log(1/frcpa(1+34/256))/2
+data8 0x3fb097ce579d204d //log(1/frcpa(1+35/256))/2
+data8 0x3fb1178e8227e47c //log(1/frcpa(1+36/256))/2
+data8 0x3fb185747dbecf34 //log(1/frcpa(1+37/256))/2
+data8 0x3fb1f3b925f25d41 //log(1/frcpa(1+38/256))/2
+data8 0x3fb2625d1e6ddf57 //log(1/frcpa(1+39/256))/2
+data8 0x3fb2d1610c86813a //log(1/frcpa(1+40/256))/2
+data8 0x3fb340c59741142e //log(1/frcpa(1+41/256))/2
+data8 0x3fb3b08b6757f2a9 //log(1/frcpa(1+42/256))/2
+data8 0x3fb40dfb08378003 //log(1/frcpa(1+43/256))/2
+data8 0x3fb47e74e8ca5f7c //log(1/frcpa(1+44/256))/2
+data8 0x3fb4ef51f6466de4 //log(1/frcpa(1+45/256))/2
+data8 0x3fb56092e02ba516 //log(1/frcpa(1+46/256))/2
+data8 0x3fb5d23857cd74d5 //log(1/frcpa(1+47/256))/2
+data8 0x3fb6313a37335d76 //log(1/frcpa(1+48/256))/2
+data8 0x3fb6a399dabbd383 //log(1/frcpa(1+49/256))/2
+data8 0x3fb70337dd3ce41b //log(1/frcpa(1+50/256))/2
+data8 0x3fb77654128f6127 //log(1/frcpa(1+51/256))/2
+data8 0x3fb7e9d82a0b022d //log(1/frcpa(1+52/256))/2
+data8 0x3fb84a6b759f512f //log(1/frcpa(1+53/256))/2
+data8 0x3fb8ab47d5f5a310 //log(1/frcpa(1+54/256))/2
+data8 0x3fb91fe49096581b //log(1/frcpa(1+55/256))/2
+data8 0x3fb981634011aa75 //log(1/frcpa(1+56/256))/2
+data8 0x3fb9f6c407089664 //log(1/frcpa(1+57/256))/2
+data8 0x3fba58e729348f43 //log(1/frcpa(1+58/256))/2
+data8 0x3fbabb55c31693ad //log(1/frcpa(1+59/256))/2
+data8 0x3fbb1e104919efd0 //log(1/frcpa(1+60/256))/2
+data8 0x3fbb94ee93e367cb //log(1/frcpa(1+61/256))/2
+data8 0x3fbbf851c067555f //log(1/frcpa(1+62/256))/2
+data8 0x3fbc5c0254bf23a6 //log(1/frcpa(1+63/256))/2
+data8 0x3fbcc000c9db3c52 //log(1/frcpa(1+64/256))/2
+data8 0x3fbd244d99c85674 //log(1/frcpa(1+65/256))/2
+data8 0x3fbd88e93fb2f450 //log(1/frcpa(1+66/256))/2
+data8 0x3fbdedd437eaef01 //log(1/frcpa(1+67/256))/2
+data8 0x3fbe530effe71012 //log(1/frcpa(1+68/256))/2
+data8 0x3fbeb89a1648b971 //log(1/frcpa(1+69/256))/2
+data8 0x3fbf1e75fadf9bde //log(1/frcpa(1+70/256))/2
+data8 0x3fbf84a32ead7c35 //log(1/frcpa(1+71/256))/2
+data8 0x3fbfeb2233ea07cd //log(1/frcpa(1+72/256))/2
+data8 0x3fc028f9c7035c1c //log(1/frcpa(1+73/256))/2
+data8 0x3fc05c8be0d9635a //log(1/frcpa(1+74/256))/2
+data8 0x3fc085eb8f8ae797 //log(1/frcpa(1+75/256))/2
+data8 0x3fc0b9c8e32d1911 //log(1/frcpa(1+76/256))/2
+data8 0x3fc0edd060b78081 //log(1/frcpa(1+77/256))/2
+data8 0x3fc122024cf0063f //log(1/frcpa(1+78/256))/2
+data8 0x3fc14be2927aecd4 //log(1/frcpa(1+79/256))/2
+data8 0x3fc180618ef18adf //log(1/frcpa(1+80/256))/2
+data8 0x3fc1b50bbe2fc63b //log(1/frcpa(1+81/256))/2
+data8 0x3fc1df4cc7cf242d //log(1/frcpa(1+82/256))/2
+data8 0x3fc214456d0eb8d4 //log(1/frcpa(1+83/256))/2
+data8 0x3fc23ec5991eba49 //log(1/frcpa(1+84/256))/2
+data8 0x3fc2740d9f870afb //log(1/frcpa(1+85/256))/2
+data8 0x3fc29ecdabcdfa04 //log(1/frcpa(1+86/256))/2
+data8 0x3fc2d46602adccee //log(1/frcpa(1+87/256))/2
+data8 0x3fc2ff66b04ea9d4 //log(1/frcpa(1+88/256))/2
+data8 0x3fc335504b355a37 //log(1/frcpa(1+89/256))/2
+data8 0x3fc360925ec44f5d //log(1/frcpa(1+90/256))/2
+data8 0x3fc38bf1c3337e75 //log(1/frcpa(1+91/256))/2
+data8 0x3fc3c25277333184 //log(1/frcpa(1+92/256))/2
+data8 0x3fc3edf463c1683e //log(1/frcpa(1+93/256))/2
+data8 0x3fc419b423d5e8c7 //log(1/frcpa(1+94/256))/2
+data8 0x3fc44591e0539f49 //log(1/frcpa(1+95/256))/2
+data8 0x3fc47c9175b6f0ad //log(1/frcpa(1+96/256))/2
+data8 0x3fc4a8b341552b09 //log(1/frcpa(1+97/256))/2
+data8 0x3fc4d4f3908901a0 //log(1/frcpa(1+98/256))/2
+data8 0x3fc501528da1f968 //log(1/frcpa(1+99/256))/2
+data8 0x3fc52dd06347d4f6 //log(1/frcpa(1+100/256))/2
+data8 0x3fc55a6d3c7b8a8a //log(1/frcpa(1+101/256))/2
+data8 0x3fc5925d2b112a59 //log(1/frcpa(1+102/256))/2
+data8 0x3fc5bf406b543db2 //log(1/frcpa(1+103/256))/2
+data8 0x3fc5ec433d5c35ae //log(1/frcpa(1+104/256))/2
+data8 0x3fc61965cdb02c1f //log(1/frcpa(1+105/256))/2
+data8 0x3fc646a84935b2a2 //log(1/frcpa(1+106/256))/2
+data8 0x3fc6740add31de94 //log(1/frcpa(1+107/256))/2
+data8 0x3fc6a18db74a58c5 //log(1/frcpa(1+108/256))/2
+data8 0x3fc6cf31058670ec //log(1/frcpa(1+109/256))/2
+data8 0x3fc6f180e852f0ba //log(1/frcpa(1+110/256))/2
+data8 0x3fc71f5d71b894f0 //log(1/frcpa(1+111/256))/2
+data8 0x3fc74d5aefd66d5c //log(1/frcpa(1+112/256))/2
+data8 0x3fc77b79922bd37e //log(1/frcpa(1+113/256))/2
+data8 0x3fc7a9b9889f19e2 //log(1/frcpa(1+114/256))/2
+data8 0x3fc7d81b037eb6a6 //log(1/frcpa(1+115/256))/2
+data8 0x3fc8069e33827231 //log(1/frcpa(1+116/256))/2
+data8 0x3fc82996d3ef8bcb //log(1/frcpa(1+117/256))/2
+data8 0x3fc85855776dcbfb //log(1/frcpa(1+118/256))/2
+data8 0x3fc8873658327ccf //log(1/frcpa(1+119/256))/2
+data8 0x3fc8aa75973ab8cf //log(1/frcpa(1+120/256))/2
+data8 0x3fc8d992dc8824e5 //log(1/frcpa(1+121/256))/2
+data8 0x3fc908d2ea7d9512 //log(1/frcpa(1+122/256))/2
+data8 0x3fc92c59e79c0e56 //log(1/frcpa(1+123/256))/2
+data8 0x3fc95bd750ee3ed3 //log(1/frcpa(1+124/256))/2
+data8 0x3fc98b7811a3ee5b //log(1/frcpa(1+125/256))/2
+data8 0x3fc9af47f33d406c //log(1/frcpa(1+126/256))/2
+data8 0x3fc9df270c1914a8 //log(1/frcpa(1+127/256))/2
+data8 0x3fca0325ed14fda4 //log(1/frcpa(1+128/256))/2
+data8 0x3fca33440224fa79 //log(1/frcpa(1+129/256))/2
+data8 0x3fca57725e80c383 //log(1/frcpa(1+130/256))/2
+data8 0x3fca87d0165dd199 //log(1/frcpa(1+131/256))/2
+data8 0x3fcaac2e6c03f896 //log(1/frcpa(1+132/256))/2
+data8 0x3fcadccc6fdf6a81 //log(1/frcpa(1+133/256))/2
+data8 0x3fcb015b3eb1e790 //log(1/frcpa(1+134/256))/2
+data8 0x3fcb323a3a635948 //log(1/frcpa(1+135/256))/2
+data8 0x3fcb56fa04462909 //log(1/frcpa(1+136/256))/2
+data8 0x3fcb881aa659bc93 //log(1/frcpa(1+137/256))/2
+data8 0x3fcbad0bef3db165 //log(1/frcpa(1+138/256))/2
+data8 0x3fcbd21297781c2f //log(1/frcpa(1+139/256))/2
+data8 0x3fcc039236f08819 //log(1/frcpa(1+140/256))/2
+data8 0x3fcc28cb1e4d32fd //log(1/frcpa(1+141/256))/2
+data8 0x3fcc4e19b84723c2 //log(1/frcpa(1+142/256))/2
+data8 0x3fcc7ff9c74554c9 //log(1/frcpa(1+143/256))/2
+data8 0x3fcca57b64e9db05 //log(1/frcpa(1+144/256))/2
+data8 0x3fcccb130a5cebb0 //log(1/frcpa(1+145/256))/2
+data8 0x3fccf0c0d18f326f //log(1/frcpa(1+146/256))/2
+data8 0x3fcd232075b5a201 //log(1/frcpa(1+147/256))/2
+data8 0x3fcd490246defa6b //log(1/frcpa(1+148/256))/2
+data8 0x3fcd6efa918d25cd //log(1/frcpa(1+149/256))/2
+data8 0x3fcd9509707ae52f //log(1/frcpa(1+150/256))/2
+data8 0x3fcdbb2efe92c554 //log(1/frcpa(1+151/256))/2
+data8 0x3fcdee2f3445e4af //log(1/frcpa(1+152/256))/2
+data8 0x3fce148a1a2726ce //log(1/frcpa(1+153/256))/2
+data8 0x3fce3afc0a49ff40 //log(1/frcpa(1+154/256))/2
+data8 0x3fce6185206d516e //log(1/frcpa(1+155/256))/2
+data8 0x3fce882578823d52 //log(1/frcpa(1+156/256))/2
+data8 0x3fceaedd2eac990c //log(1/frcpa(1+157/256))/2
+data8 0x3fced5ac5f436be3 //log(1/frcpa(1+158/256))/2
+data8 0x3fcefc9326d16ab9 //log(1/frcpa(1+159/256))/2
+data8 0x3fcf2391a2157600 //log(1/frcpa(1+160/256))/2
+data8 0x3fcf4aa7ee03192d //log(1/frcpa(1+161/256))/2
+data8 0x3fcf71d627c30bb0 //log(1/frcpa(1+162/256))/2
+data8 0x3fcf991c6cb3b379 //log(1/frcpa(1+163/256))/2
+data8 0x3fcfc07ada69a910 //log(1/frcpa(1+164/256))/2
+data8 0x3fcfe7f18eb03d3e //log(1/frcpa(1+165/256))/2
+data8 0x3fd007c053c5002e //log(1/frcpa(1+166/256))/2
+data8 0x3fd01b942198a5a1 //log(1/frcpa(1+167/256))/2
+data8 0x3fd02f74400c64eb //log(1/frcpa(1+168/256))/2
+data8 0x3fd04360be7603ad //log(1/frcpa(1+169/256))/2
+data8 0x3fd05759ac47fe34 //log(1/frcpa(1+170/256))/2
+data8 0x3fd06b5f1911cf52 //log(1/frcpa(1+171/256))/2
+data8 0x3fd078bf0533c568 //log(1/frcpa(1+172/256))/2
+data8 0x3fd08cd9687e7b0e //log(1/frcpa(1+173/256))/2
+data8 0x3fd0a10074cf9019 //log(1/frcpa(1+174/256))/2
+data8 0x3fd0b5343a234477 //log(1/frcpa(1+175/256))/2
+data8 0x3fd0c974c89431ce //log(1/frcpa(1+176/256))/2
+data8 0x3fd0ddc2305b9886 //log(1/frcpa(1+177/256))/2
+data8 0x3fd0eb524bafc918 //log(1/frcpa(1+178/256))/2
+data8 0x3fd0ffb54213a476 //log(1/frcpa(1+179/256))/2
+data8 0x3fd114253da97d9f //log(1/frcpa(1+180/256))/2
+data8 0x3fd128a24f1d9aff //log(1/frcpa(1+181/256))/2
+data8 0x3fd1365252bf0865 //log(1/frcpa(1+182/256))/2
+data8 0x3fd14ae558b4a92d //log(1/frcpa(1+183/256))/2
+data8 0x3fd15f85a19c765b //log(1/frcpa(1+184/256))/2
+data8 0x3fd16d4d38c119fa //log(1/frcpa(1+185/256))/2
+data8 0x3fd18203c20dd133 //log(1/frcpa(1+186/256))/2
+data8 0x3fd196c7bc4b1f3b //log(1/frcpa(1+187/256))/2
+data8 0x3fd1a4a738b7a33c //log(1/frcpa(1+188/256))/2
+data8 0x3fd1b981c0c9653d //log(1/frcpa(1+189/256))/2
+data8 0x3fd1ce69e8bb106b //log(1/frcpa(1+190/256))/2
+data8 0x3fd1dc619de06944 //log(1/frcpa(1+191/256))/2
+data8 0x3fd1f160a2ad0da4 //log(1/frcpa(1+192/256))/2
+data8 0x3fd2066d7740737e //log(1/frcpa(1+193/256))/2
+data8 0x3fd2147dba47a394 //log(1/frcpa(1+194/256))/2
+data8 0x3fd229a1bc5ebac3 //log(1/frcpa(1+195/256))/2
+data8 0x3fd237c1841a502e //log(1/frcpa(1+196/256))/2
+data8 0x3fd24cfce6f80d9a //log(1/frcpa(1+197/256))/2
+data8 0x3fd25b2c55cd5762 //log(1/frcpa(1+198/256))/2
+data8 0x3fd2707f4d5f7c41 //log(1/frcpa(1+199/256))/2
+data8 0x3fd285e0842ca384 //log(1/frcpa(1+200/256))/2
+data8 0x3fd294294708b773 //log(1/frcpa(1+201/256))/2
+data8 0x3fd2a9a2670aff0c //log(1/frcpa(1+202/256))/2
+data8 0x3fd2b7fb2c8d1cc1 //log(1/frcpa(1+203/256))/2
+data8 0x3fd2c65a6395f5f5 //log(1/frcpa(1+204/256))/2
+data8 0x3fd2dbf557b0df43 //log(1/frcpa(1+205/256))/2
+data8 0x3fd2ea64c3f97655 //log(1/frcpa(1+206/256))/2
+data8 0x3fd3001823684d73 //log(1/frcpa(1+207/256))/2
+data8 0x3fd30e97e9a8b5cd //log(1/frcpa(1+208/256))/2
+data8 0x3fd32463ebdd34ea //log(1/frcpa(1+209/256))/2
+data8 0x3fd332f4314ad796 //log(1/frcpa(1+210/256))/2
+data8 0x3fd348d90e7464d0 //log(1/frcpa(1+211/256))/2
+data8 0x3fd35779f8c43d6e //log(1/frcpa(1+212/256))/2
+data8 0x3fd36621961a6a99 //log(1/frcpa(1+213/256))/2
+data8 0x3fd37c299f3c366a //log(1/frcpa(1+214/256))/2
+data8 0x3fd38ae2171976e7 //log(1/frcpa(1+215/256))/2
+data8 0x3fd399a157a603e7 //log(1/frcpa(1+216/256))/2
+data8 0x3fd3afccfe77b9d1 //log(1/frcpa(1+217/256))/2
+data8 0x3fd3be9d503533b5 //log(1/frcpa(1+218/256))/2
+data8 0x3fd3cd7480b4a8a3 //log(1/frcpa(1+219/256))/2
+data8 0x3fd3e3c43918f76c //log(1/frcpa(1+220/256))/2
+data8 0x3fd3f2acb27ed6c7 //log(1/frcpa(1+221/256))/2
+data8 0x3fd4019c2125ca93 //log(1/frcpa(1+222/256))/2
+data8 0x3fd4181061389722 //log(1/frcpa(1+223/256))/2
+data8 0x3fd42711518df545 //log(1/frcpa(1+224/256))/2
+data8 0x3fd436194e12b6bf //log(1/frcpa(1+225/256))/2
+data8 0x3fd445285d68ea69 //log(1/frcpa(1+226/256))/2
+data8 0x3fd45bcc464c893a //log(1/frcpa(1+227/256))/2
+data8 0x3fd46aed21f117fc //log(1/frcpa(1+228/256))/2
+data8 0x3fd47a1527e8a2d3 //log(1/frcpa(1+229/256))/2
+data8 0x3fd489445efffccc //log(1/frcpa(1+230/256))/2
+data8 0x3fd4a018bcb69835 //log(1/frcpa(1+231/256))/2
+data8 0x3fd4af5a0c9d65d7 //log(1/frcpa(1+232/256))/2
+data8 0x3fd4bea2a5bdbe87 //log(1/frcpa(1+233/256))/2
+data8 0x3fd4cdf28f10ac46 //log(1/frcpa(1+234/256))/2
+data8 0x3fd4dd49cf994058 //log(1/frcpa(1+235/256))/2
+data8 0x3fd4eca86e64a684 //log(1/frcpa(1+236/256))/2
+data8 0x3fd503c43cd8eb68 //log(1/frcpa(1+237/256))/2
+data8 0x3fd513356667fc57 //log(1/frcpa(1+238/256))/2
+data8 0x3fd522ae0738a3d8 //log(1/frcpa(1+239/256))/2
+data8 0x3fd5322e26867857 //log(1/frcpa(1+240/256))/2
+data8 0x3fd541b5cb979809 //log(1/frcpa(1+241/256))/2
+data8 0x3fd55144fdbcbd62 //log(1/frcpa(1+242/256))/2
+data8 0x3fd560dbc45153c7 //log(1/frcpa(1+243/256))/2
+data8 0x3fd5707a26bb8c66 //log(1/frcpa(1+244/256))/2
+data8 0x3fd587f60ed5b900 //log(1/frcpa(1+245/256))/2
+data8 0x3fd597a7977c8f31 //log(1/frcpa(1+246/256))/2
+data8 0x3fd5a760d634bb8b //log(1/frcpa(1+247/256))/2
+data8 0x3fd5b721d295f10f //log(1/frcpa(1+248/256))/2
+data8 0x3fd5c6ea94431ef9 //log(1/frcpa(1+249/256))/2
+data8 0x3fd5d6bb22ea86f6 //log(1/frcpa(1+250/256))/2
+data8 0x3fd5e6938645d390 //log(1/frcpa(1+251/256))/2
+data8 0x3fd5f673c61a2ed2 //log(1/frcpa(1+252/256))/2
+data8 0x3fd6065bea385926 //log(1/frcpa(1+253/256))/2
+data8 0x3fd6164bfa7cc06b //log(1/frcpa(1+254/256))/2
+data8 0x3fd62643fecf9743 //log(1/frcpa(1+255/256))/2
+LOCAL_OBJECT_END(atanhf_data2)
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(atanhf)
+
+{ .mfi
+ getf.exp rArgSExpb = f8
+ fclass.m p9,p0 = f8, 0x0b // is arg denormal ?
+ mov rExpbMask = 0x1ffff
+}
+{ .mfi
+ addl DataPtr = @ltoff(atanhf_data), gp
+ fnma.s1 fOneMx = f8, f1, f1 // 1 - x
+ mov rBias = 0xffff
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p0 = f8, 0xc7 // is arg NaN or +/-0 ?
+ mov rNearZeroBound = 0xffeb // 2^(-20)
+}
+{ .mfi
+ ld8 DataPtr = [DataPtr]
+ fma.s1 fOnePx = f8, f1, f1 // 1 + x
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fnorm.s1 fNormX = f8 // Normalize x
+(p9) br.cond.spnt ATANH_UNORM // Branch if x=unorm
+}
+;;
+
+ATANH_COMMON:
+// Return here if x=unorm and not denorm
+{ .mfi
+ ldfpd fP3, fP2 = [DataPtr], 16
+ fma.s1 fX2 = f8, f8, f0 // x^2
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p7) fma.s.s0 f8 = f8,f1,f8 // NaN or +/-0
+(p7) br.ret.spnt b0
+}
+;;
+
+{ .mfi
+ ldfpd fP1, fHalf = [DataPtr], 16
+ frcpa.s1 fRcpM, p9 = f1, fOneMx // rcpm = frcpa(1 - x)
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp rExpbm = fOneMx
+ frcpa.s1 fRcpP, p0 = f1, fOnePx // rcpp = frcpa(1 + x)
+ // biased exponent
+ and rArgExpb = rArgSExpb, rExpbMask
+}
+;;
+
+{ .mmi
+ getf.exp rExpbp = fOnePx
+ // is |x| < 2^(-20) ?
+ cmp.gt p8,p0 = rNearZeroBound, rArgExpb
+ cmp.ge p6,p0 = rArgExpb, rBias // is |x| >= 1 ?
+}
+;;
+
+{ .mmb
+ getf.sig rSigm = fOneMx
+ nop.m 0
+(p6) br.cond.spnt atanhf_ge_one
+}
+;;
+
+{ .mfb
+ getf.sig rSigp = fOnePx
+(p8) fma.s.s0 f8 = fX2, f8, f8 // x + x^3
+(p8) br.ret.spnt b0 // Exit for MAX_DENORM_ABS < |x| < 2^-20
+}
+;;
+
+{ .mfi
+ ldfd fLog2 = [DataPtr], 16
+ fms.s1 fRm = fRcpM, fOneMx, f1 // rm = rcpm * (1 - x) - 1
+ nop.i 0
+}
+;;
+
+{ .mmf
+ // (1 - x) is always positive here and we need not mask sign bit
+ sub rNm = rExpbm, rBias
+ // (1 + x) is always positive here and we need not mask sign bit
+ sub rNp = rExpbp, rBias
+ fms.s1 fRp = fRcpP, fOnePx, f1 // rp = rcpp * (1 + x) - 1
+}
+;;
+
+{ .mmi
+ setf.sig fN4CvtM = rNm
+ setf.sig fN4CvtP = rNp
+ extr.u rIndm = rSigm,55,8 // Extract 8 bits
+}
+;;
+
+{ .mmi
+ shladd RcpTablePtrM = rIndm, 3, DataPtr
+ nop.m 0
+ extr.u rIndp = rSigp,55,8 // Extract 8 bits
+}
+;;
+
+{ .mmi
+ ldfd fLogTm = [RcpTablePtrM]
+ shladd RcpTablePtrP = rIndp, 3, DataPtr
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfd fLogTp = [RcpTablePtrP]
+ fma.s1 fRm2 = fRm, fRm, f0 // rm^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP32m = fP3, fRm, fP2 // P3*rm + P2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fRp2 = fRp, fRp, f0 // rp^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP10m = fP1, fRm, fHalf // P1*rm + 1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP32p = fP3, fRp, fP2 // P3*rp + P2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP10p = fP1, fRp, fHalf // P1*rp + 1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcvt.xf fNm = fN4CvtM
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcvt.xf fNp = fN4CvtP
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // (P3*rm + P2)*rm^2 + (P1*rm + 1)
+ fma.s1 fP32m = fP32m, fRm2, fP10m
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (P3*rp + P2)*rp^2 + (P1*rp + 1)
+ fma.s1 fP32p = fP32p, fRp2, fP10p
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // Nm*ln(2)/2 + Tm/2
+ fma.s1 fLogTm = fNm, fLog2, fLogTm
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Np*ln(2)/2 + Tp/2
+ fma.s1 fLogTp = fNp, fLog2, fLogTp
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // ((P3*rm + P2)*rm^2 + (P3*rm + 1))*0.5*rm + (Nm*ln(2)/2 + Tm/2)
+ fma.d.s1 fP32m = fP32m, fRm, fLogTm
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // ((P3*rp + P2)*rp^2 + (P3*rp + 1))*0.5*rp + (Np*ln(2)/2 + Tp/2)
+ fma.d.s1 fP32p = fP32p, fRp, fLogTp
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // atanhf(x) = 0.5 * (log(1 + x) - log(1 - x))
+ fnma.s.s0 f8 = fP32m, f1, fP32p
+ br.ret.sptk b0 // Exit for 2^(-20) <= |x| < 1.0
+}
+;;
+
+
+ATANH_UNORM:
+// Here if x=unorm
+{ .mfi
+ getf.exp rArgSExpb = fNormX // Recompute if x unorm
+ fclass.m p0,p9 = fNormX, 0x0b // Test x denorm
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fcmp.lt.s0 p10,p11 = f8, f0 // Set denormal flag
+(p9) br.cond.sptk ATANH_COMMON // Continue if x unorm and not denorm
+}
+;;
+
+.pred.rel "mutex",p6,p7
+{ .mfi
+ nop.m 0
+(p6) fnma.s.s0 f8 = f8,f8,f8 // Result x-x^2 if x=-denorm
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p7) fma.s.s0 f8 = f8,f8,f8 // Result x+x^2 if x=+denorm
+ br.ret.spnt b0 // Exit if denorm
+}
+;;
+
+// Here if |x| >= 1.0
+atanhf_ge_one:
+{ .mfi
+ alloc r32 = ar.pfs,1,3,4,0
+ fmerge.s fArgAbs = f0, f8 // Form |x|
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.s f10 = f8, f8 // Save input for error call
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s1 p6,p7 = fArgAbs, f1 // Test for |x| = 1.0
+ nop.i 0
+}
+;;
+
+// Set error tag and result, and raise invalid flag if |x| > 1.0
+{ .mfi
+(p7) mov atanh_GR_tag = 133
+(p7) frcpa.s0 f8, p0 = f0, f0 // Get QNaN, and raise invalid
+ nop.i 0
+}
+;;
+
+// Set error tag and result, and raise Z flag if |x| = 1.0
+{ .mfi
+ nop.m 0
+(p6) frcpa.s0 fRm, p0 = f1, f0 // Get inf, and raise Z flag
+ nop.i 0
+}
+;;
+
+{ .mfb
+(p6) mov atanh_GR_tag = 134
+(p6) fmerge.s f8 = f8, fRm // result is +-inf
+ br.cond.sptk __libm_error_region // Exit if |x| >= 1.0
+}
+;;
+
+GLOBAL_LIBM_END(atanhf)
+libm_alias_float_other (atanh, atanh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfs [GR_Parameter_Y] = f1,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfs [GR_Parameter_X] = f10 // STORE Parameter 1 on stack
+ // Parameter 3 address
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_atanhl.S b/sysdeps/ia64/fpu/e_atanhl.S
new file mode 100644
index 0000000000..5f45033029
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_atanhl.S
@@ -0,0 +1,1156 @@
+.file "atanhl.s"
+
+
+// Copyright (c) 2001 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,INCLUDING,BUT NOT
+// LIMITED TO,THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT,INDIRECT,INCIDENTAL,SPECIAL,
+// EXEMPLARY,OR CONSEQUENTIAL DAMAGES (INCLUDING,BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,DATA,OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY,WHETHER IN CONTRACT,STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE,EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code,and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 09/10/01 Initial version
+// 12/11/01 Corrected .restore syntax
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
+//
+//*********************************************************************
+//
+//*********************************************************************
+//
+// Function: atanhl(x) computes the principle value of the inverse
+// hyperbolic tangent of x.
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f33-f73
+//
+// General Purpose Registers:
+// r32-r52
+// r49-r52 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p15
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+// atanhl(inf) = QNaN
+// atanhl(-inf) = QNaN
+// atanhl(+/-0) = +/-0
+// atanhl(1) = +inf
+// atanhl(-1) = -inf
+// atanhl(|x|>1) = QNaN
+// atanhl(SNaN) = QNaN
+// atanhl(QNaN) = QNaN
+//
+//*********************************************************************
+//
+// Overview
+//
+// The method consists of two cases.
+//
+// If |x| < 1/32 use case atanhl_near_zero;
+// else use case atanhl_regular;
+//
+// Case atanhl_near_zero:
+//
+// atanhl(x) can be approximated by the Taylor series expansion
+// up to order 17.
+//
+// Case atanhl_regular:
+//
+// Here we use formula atanhl(x) = sign(x)*log1pl(2*|x|/(1-|x|))/2 and
+// calculation is subdivided into two stages. The first stage is
+// calculating of X = 2*|x|/(1-|x|). The second one is calculating of
+// sign(x)*log1pl(X)/2. To obtain required accuracy we use precise division
+// algorithm output of which is a pair of two extended precision values those
+// approximate result of division with accuracy higher than working
+// precision. This pair is passed to modified log1pl function.
+//
+//
+// 1. calculating of X = 2*|x|/(1-|x|)
+// ( based on Peter Markstein's "IA-64 and Elementary Functions" book )
+// ********************************************************************
+//
+// a = 2*|x|
+// b = 1 - |x|
+// b_lo = |x| - (1 - b)
+//
+// y = frcpa(b) initial approximation of 1/b
+// q = a*y initial approximation of a/b
+//
+// e = 1 - b*y
+// e2 = e + e^2
+// e1 = e^2
+// y1 = y + y*e2 = y + y*(e+e^2)
+//
+// e3 = e + e1^2
+// y2 = y + y1*e3 = y + y*(e+e^2+..+e^6)
+//
+// r = a - b*q
+// e = 1 - b*y2
+// X = q + r*y2 high part of a/b
+//
+// y3 = y2 + y2*e4
+// r1 = a - b*X
+// r1 = r1 - b_lo*X
+// X_lo = r1*y3 low part of a/b
+//
+// 2. special log1p algorithm overview
+// ***********************************
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute logl(Arg) = log1pl (Arg-1) for an argument Arg in [1,2),
+// we construct a value G such that G*Arg is close to 1 and that
+// logl(1/G) is obtainable easily from a table of values calculated
+// beforehand. Thus
+//
+// logl(Arg) = logl(1/G) + logl(G*Arg)
+// = logl(1/G) + logl(1 + (G*Arg - 1))
+//
+// Because |G*Arg - 1| is small, the second term on the right hand
+// side can be approximated by a short polynomial. We elaborate
+// this method in several steps.
+//
+// Step 0: Initialization
+// ------
+// We need to calculate logl(X + X_lo + 1). Obtain N, S_hi such that
+//
+// X + X_lo + 1 = 2^N * ( S_hi + S_lo ) exactly
+//
+// where S_hi in [1,2) and S_lo is a correction to S_hi in the sense
+// that |S_lo| <= ulp(S_hi).
+//
+// For the special version of log1p we add X_lo to S_lo (S_lo = S_lo + X_lo)
+// !-----------------------------------------------------------------------!
+//
+// Step 1: Argument Reduction
+// ------
+// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
+//
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1) + G * S_lo
+//
+// These G_j's have the property that the product is exactly
+// representable and that |r| < 2^(-12) as a result.
+//
+// Step 2: Approximation
+// ------
+// logl(1 + r) is approximated by a short polynomial poly(r).
+//
+// Step 3: Reconstruction
+// ------
+// Finally, log1pl(X + X_lo) = logl(X + X_lo + 1) is given by
+//
+// logl(X + X_lo + 1) = logl(2^N * (S_hi + S_lo))
+// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
+// ~=~ N*logl(2) + logl(1/G) + poly(r).
+//
+// For detailed description see log1p1 function, regular path.
+//
+//*********************************************************************
+
+RODATA
+.align 64
+
+// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
+
+LOCAL_OBJECT_START(Constants_TaylorSeries)
+data8 0xF0F0F0F0F0F0F0F1,0x00003FFA // C17
+data8 0x8888888888888889,0x00003FFB // C15
+data8 0x9D89D89D89D89D8A,0x00003FFB // C13
+data8 0xBA2E8BA2E8BA2E8C,0x00003FFB // C11
+data8 0xE38E38E38E38E38E,0x00003FFB // C9
+data8 0x9249249249249249,0x00003FFC // C7
+data8 0xCCCCCCCCCCCCCCCD,0x00003FFC // C5
+data8 0xAAAAAAAAAAAAAAAA,0x00003FFD // C3
+data4 0x3f000000 // 1/2
+data4 0x00000000 // pad
+data4 0x00000000
+data4 0x00000000
+LOCAL_OBJECT_END(Constants_TaylorSeries)
+
+LOCAL_OBJECT_START(Constants_Q)
+data4 0x00000000,0xB1721800,0x00003FFE,0x00000000 // log2_hi
+data4 0x4361C4C6,0x82E30865,0x0000BFE2,0x00000000 // log2_lo
+data4 0x328833CB,0xCCCCCAF2,0x00003FFC,0x00000000 // Q4
+data4 0xA9D4BAFB,0x80000077,0x0000BFFD,0x00000000 // Q3
+data4 0xAAABE3D2,0xAAAAAAAA,0x00003FFD,0x00000000 // Q2
+data4 0xFFFFDAB7,0xFFFFFFFF,0x0000BFFD,0x00000000 // Q1
+LOCAL_OBJECT_END(Constants_Q)
+
+
+// Z1 - 16 bit fixed
+LOCAL_OBJECT_START(Constants_Z_1)
+data4 0x00008000
+data4 0x00007879
+data4 0x000071C8
+data4 0x00006BCB
+data4 0x00006667
+data4 0x00006187
+data4 0x00005D18
+data4 0x0000590C
+data4 0x00005556
+data4 0x000051EC
+data4 0x00004EC5
+data4 0x00004BDB
+data4 0x00004925
+data4 0x0000469F
+data4 0x00004445
+data4 0x00004211
+LOCAL_OBJECT_END(Constants_Z_1)
+
+// G1 and H1 - IEEE single and h1 - IEEE double
+LOCAL_OBJECT_START(Constants_G_H_h1)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F70F0F0,0x3D785196
+data8 0x3DA163A6617D741C
+data4 0x3F638E38,0x3DF13843
+data8 0x3E2C55E6CBD3D5BB
+data4 0x3F579430,0x3E2FF9A0
+data8 0xBE3EB0BFD86EA5E7
+data4 0x3F4CCCC8,0x3E647FD6
+data8 0x3E2E6A8C86B12760
+data4 0x3F430C30,0x3E8B3AE7
+data8 0x3E47574C5C0739BA
+data4 0x3F3A2E88,0x3EA30C68
+data8 0x3E20E30F13E8AF2F
+data4 0x3F321640,0x3EB9CEC8
+data8 0xBE42885BF2C630BD
+data4 0x3F2AAAA8,0x3ECF9927
+data8 0x3E497F3497E577C6
+data4 0x3F23D708,0x3EE47FC5
+data8 0x3E3E6A6EA6B0A5AB
+data4 0x3F1D89D8,0x3EF8947D
+data8 0xBDF43E3CD328D9BE
+data4 0x3F17B420,0x3F05F3A1
+data8 0x3E4094C30ADB090A
+data4 0x3F124920,0x3F0F4303
+data8 0xBE28FBB2FC1FE510
+data4 0x3F0D3DC8,0x3F183EBF
+data8 0x3E3A789510FDE3FA
+data4 0x3F088888,0x3F20EC80
+data8 0x3E508CE57CC8C98F
+data4 0x3F042108,0x3F29516A
+data8 0xBE534874A223106C
+LOCAL_OBJECT_END(Constants_G_H_h1)
+
+// Z2 - 16 bit fixed
+LOCAL_OBJECT_START(Constants_Z_2)
+data4 0x00008000
+data4 0x00007F81
+data4 0x00007F02
+data4 0x00007E85
+data4 0x00007E08
+data4 0x00007D8D
+data4 0x00007D12
+data4 0x00007C98
+data4 0x00007C20
+data4 0x00007BA8
+data4 0x00007B31
+data4 0x00007ABB
+data4 0x00007A45
+data4 0x000079D1
+data4 0x0000795D
+data4 0x000078EB
+LOCAL_OBJECT_END(Constants_Z_2)
+
+// G2 and H2 - IEEE single and h2 - IEEE double
+LOCAL_OBJECT_START(Constants_G_H_h2)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F7F00F8,0x3B7F875D
+data8 0x3DB5A11622C42273
+data4 0x3F7E03F8,0x3BFF015B
+data8 0x3DE620CF21F86ED3
+data4 0x3F7D08E0,0x3C3EE393
+data8 0xBDAFA07E484F34ED
+data4 0x3F7C0FC0,0x3C7E0586
+data8 0xBDFE07F03860BCF6
+data4 0x3F7B1880,0x3C9E75D2
+data8 0x3DEA370FA78093D6
+data4 0x3F7A2328,0x3CBDC97A
+data8 0x3DFF579172A753D0
+data4 0x3F792FB0,0x3CDCFE47
+data8 0x3DFEBE6CA7EF896B
+data4 0x3F783E08,0x3CFC15D0
+data8 0x3E0CF156409ECB43
+data4 0x3F774E38,0x3D0D874D
+data8 0xBE0B6F97FFEF71DF
+data4 0x3F766038,0x3D1CF49B
+data8 0xBE0804835D59EEE8
+data4 0x3F757400,0x3D2C531D
+data8 0x3E1F91E9A9192A74
+data4 0x3F748988,0x3D3BA322
+data8 0xBE139A06BF72A8CD
+data4 0x3F73A0D0,0x3D4AE46F
+data8 0x3E1D9202F8FBA6CF
+data4 0x3F72B9D0,0x3D5A1756
+data8 0xBE1DCCC4BA796223
+data4 0x3F71D488,0x3D693B9D
+data8 0xBE049391B6B7C239
+LOCAL_OBJECT_END(Constants_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 - IEEE double
+LOCAL_OBJECT_START(Constants_G_H_h3)
+data4 0x3F7FFC00,0x38800100
+data8 0x3D355595562224CD
+data4 0x3F7FF400,0x39400480
+data8 0x3D8200A206136FF6
+data4 0x3F7FEC00,0x39A00640
+data8 0x3DA4D68DE8DE9AF0
+data4 0x3F7FE400,0x39E00C41
+data8 0xBD8B4291B10238DC
+data4 0x3F7FDC00,0x3A100A21
+data8 0xBD89CCB83B1952CA
+data4 0x3F7FD400,0x3A300F22
+data8 0xBDB107071DC46826
+data4 0x3F7FCC08,0x3A4FF51C
+data8 0x3DB6FCB9F43307DB
+data4 0x3F7FC408,0x3A6FFC1D
+data8 0xBD9B7C4762DC7872
+data4 0x3F7FBC10,0x3A87F20B
+data8 0xBDC3725E3F89154A
+data4 0x3F7FB410,0x3A97F68B
+data8 0xBD93519D62B9D392
+data4 0x3F7FAC18,0x3AA7EB86
+data8 0x3DC184410F21BD9D
+data4 0x3F7FA420,0x3AB7E101
+data8 0xBDA64B952245E0A6
+data4 0x3F7F9C20,0x3AC7E701
+data8 0x3DB4B0ECAABB34B8
+data4 0x3F7F9428,0x3AD7DD7B
+data8 0x3D9923376DC40A7E
+data4 0x3F7F8C30,0x3AE7D474
+data8 0x3DC6E17B4F2083D3
+data4 0x3F7F8438,0x3AF7CBED
+data8 0x3DAE314B811D4394
+data4 0x3F7F7C40,0x3B03E1F3
+data8 0xBDD46F21B08F2DB1
+data4 0x3F7F7448,0x3B0BDE2F
+data8 0xBDDC30A46D34522B
+data4 0x3F7F6C50,0x3B13DAAA
+data8 0x3DCB0070B1F473DB
+data4 0x3F7F6458,0x3B1BD766
+data8 0xBDD65DDC6AD282FD
+data4 0x3F7F5C68,0x3B23CC5C
+data8 0xBDCDAB83F153761A
+data4 0x3F7F5470,0x3B2BC997
+data8 0xBDDADA40341D0F8F
+data4 0x3F7F4C78,0x3B33C711
+data8 0x3DCD1BD7EBC394E8
+data4 0x3F7F4488,0x3B3BBCC6
+data8 0xBDC3532B52E3E695
+data4 0x3F7F3C90,0x3B43BAC0
+data8 0xBDA3961EE846B3DE
+data4 0x3F7F34A0,0x3B4BB0F4
+data8 0xBDDADF06785778D4
+data4 0x3F7F2CA8,0x3B53AF6D
+data8 0x3DCC3ED1E55CE212
+data4 0x3F7F24B8,0x3B5BA620
+data8 0xBDBA31039E382C15
+data4 0x3F7F1CC8,0x3B639D12
+data8 0x3D635A0B5C5AF197
+data4 0x3F7F14D8,0x3B6B9444
+data8 0xBDDCCB1971D34EFC
+data4 0x3F7F0CE0,0x3B7393BC
+data8 0x3DC7450252CD7ADA
+data4 0x3F7F04F0,0x3B7B8B6D
+data8 0xBDB68F177D7F2A42
+LOCAL_OBJECT_END(Constants_G_H_h3)
+
+
+
+// Floating Point Registers
+
+FR_C17 = f50
+FR_C15 = f51
+FR_C13 = f52
+FR_C11 = f53
+FR_C9 = f54
+FR_C7 = f55
+FR_C5 = f56
+FR_C3 = f57
+FR_x2 = f58
+FR_x3 = f59
+FR_x4 = f60
+FR_x8 = f61
+
+FR_Rcp = f61
+
+FR_A = f33
+FR_R1 = f33
+
+FR_E1 = f34
+FR_E3 = f34
+FR_Y2 = f34
+FR_Y3 = f34
+
+FR_E2 = f35
+FR_Y1 = f35
+
+FR_B = f36
+FR_Y0 = f37
+FR_E0 = f38
+FR_E4 = f39
+FR_Q0 = f40
+FR_R0 = f41
+FR_B_lo = f42
+
+FR_abs_x = f43
+FR_Bp = f44
+FR_Bn = f45
+FR_Yp = f46
+FR_Yn = f47
+
+FR_X = f48
+FR_BB = f48
+FR_X_lo = f49
+
+FR_G = f50
+FR_Y_hi = f51
+FR_H = f51
+FR_h = f52
+FR_G2 = f53
+FR_H2 = f54
+FR_h2 = f55
+FR_G3 = f56
+FR_H3 = f57
+FR_h3 = f58
+
+FR_Q4 = f59
+FR_poly_lo = f59
+FR_Y_lo = f59
+
+FR_Q3 = f60
+FR_Q2 = f61
+
+FR_Q1 = f62
+FR_poly_hi = f62
+
+FR_float_N = f63
+
+FR_AA = f64
+FR_S_lo = f64
+
+FR_S_hi = f65
+FR_r = f65
+
+FR_log2_hi = f66
+FR_log2_lo = f67
+FR_Z = f68
+FR_2_to_minus_N = f69
+FR_rcub = f70
+FR_rsq = f71
+FR_05r = f72
+FR_Half = f73
+
+FR_Arg_X = f50
+FR_Arg_Y = f0
+FR_RESULT = f8
+
+
+
+// General Purpose Registers
+
+GR_ad_05 = r33
+GR_Index1 = r34
+GR_ArgExp = r34
+GR_Index2 = r35
+GR_ExpMask = r35
+GR_NearZeroBound = r36
+GR_signif = r36
+GR_X_0 = r37
+GR_X_1 = r37
+GR_X_2 = r38
+GR_Index3 = r38
+GR_minus_N = r39
+GR_Z_1 = r40
+GR_Z_2 = r40
+GR_N = r41
+GR_Bias = r42
+GR_M = r43
+GR_ad_taylor = r44
+GR_ad_taylor_2 = r45
+GR_ad2_tbl_3 = r45
+GR_ad_tbl_1 = r46
+GR_ad_tbl_2 = r47
+GR_ad_tbl_3 = r48
+GR_ad_q = r49
+GR_ad_z_1 = r50
+GR_ad_z_2 = r51
+GR_ad_z_3 = r52
+
+//
+// Added for unwind support
+//
+GR_SAVE_PFS = r46
+GR_SAVE_B0 = r47
+GR_SAVE_GP = r48
+GR_Parameter_X = r49
+GR_Parameter_Y = r50
+GR_Parameter_RESULT = r51
+GR_Parameter_TAG = r52
+
+
+
+.section .text
+GLOBAL_LIBM_ENTRY(atanhl)
+
+{ .mfi
+ alloc r32 = ar.pfs,0,17,4,0
+ fnma.s1 FR_Bp = f8,f1,f1 // b = 1 - |arg| (for x>0)
+ mov GR_ExpMask = 0x1ffff
+}
+{ .mfi
+ addl GR_ad_taylor = @ltoff(Constants_TaylorSeries),gp
+ fma.s1 FR_Bn = f8,f1,f1 // b = 1 - |arg| (for x<0)
+ mov GR_NearZeroBound = 0xfffa // biased exp of 1/32
+};;
+{ .mfi
+ getf.exp GR_ArgExp = f8
+ fcmp.lt.s1 p6,p7 = f8,f0 // is negative?
+ nop.i 0
+}
+{ .mfi
+ ld8 GR_ad_taylor = [GR_ad_taylor]
+ fmerge.s FR_abs_x = f1,f8
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fclass.m p8,p0 = f8,0x1C7 // is arg NaT,Q/SNaN or +/-0 ?
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_x2 = f8,f8,f0
+ nop.i 0
+};;
+{ .mfi
+ add GR_ad_z_1 = 0x0F0,GR_ad_taylor
+ fclass.m p9,p0 = f8,0x0a // is arg -denormal ?
+ add GR_ad_taylor_2 = 0x010,GR_ad_taylor
+}
+{ .mfi
+ add GR_ad_05 = 0x080,GR_ad_taylor
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ ldfe FR_C17 = [GR_ad_taylor],32
+ fclass.m p10,p0 = f8,0x09 // is arg +denormal ?
+ add GR_ad_tbl_1 = 0x040,GR_ad_z_1 // point to Constants_G_H_h1
+}
+{ .mfb
+ add GR_ad_z_2 = 0x140,GR_ad_z_1 // point to Constants_Z_2
+ (p8) fma.s0 f8 = f8,f1,f0 // NaN or +/-0
+ (p8) br.ret.spnt b0 // exit for Nan or +/-0
+};;
+{ .mfi
+ ldfe FR_C15 = [GR_ad_taylor_2],32
+ fclass.m p15,p0 = f8,0x23 // is +/-INF ?
+ add GR_ad_tbl_2 = 0x180,GR_ad_z_1 // point to Constants_G_H_h2
+}
+{ .mfb
+ ldfe FR_C13 = [GR_ad_taylor],32
+ (p9) fnma.s0 f8 = f8,f8,f8 // -denormal
+ (p9) br.ret.spnt b0 // exit for -denormal
+};;
+{ .mfi
+ ldfe FR_C11 = [GR_ad_taylor_2],32
+ fcmp.eq.s0 p13,p0 = FR_abs_x,f1 // is |arg| = 1?
+ nop.i 0
+}
+{ .mfb
+ ldfe FR_C9 = [GR_ad_taylor],32
+(p10) fma.s0 f8 = f8,f8,f8 // +denormal
+(p10) br.ret.spnt b0 // exit for +denormal
+};;
+{ .mfi
+ ldfe FR_C7 = [GR_ad_taylor_2],32
+ (p6) frcpa.s1 FR_Yn,p11 = f1,FR_Bn // y = frcpa(b)
+ and GR_ArgExp = GR_ArgExp,GR_ExpMask // biased exponent
+}
+{ .mfb
+ ldfe FR_C5 = [GR_ad_taylor],32
+ fnma.s1 FR_B = FR_abs_x,f1,f1 // b = 1 - |arg|
+(p15) br.cond.spnt atanhl_gt_one // |arg| > 1
+};;
+{ .mfb
+ cmp.gt p14,p0 = GR_NearZeroBound,GR_ArgExp
+ (p7) frcpa.s1 FR_Yp,p12 = f1,FR_Bp // y = frcpa(b)
+(p13) br.cond.spnt atanhl_eq_one // |arg| = 1/32
+}
+{ .mfb
+ ldfe FR_C3 = [GR_ad_taylor_2],32
+ fma.s1 FR_A = FR_abs_x,f1,FR_abs_x // a = 2 * |arg|
+(p14) br.cond.spnt atanhl_near_zero // |arg| < 1/32
+};;
+{ .mfi
+ nop.m 0
+ fcmp.gt.s0 p8,p0 = FR_abs_x,f1 // is |arg| > 1 ?
+ nop.i 0
+};;
+.pred.rel "mutex",p6,p7
+{ .mfi
+ nop.m 0
+ (p6) fnma.s1 FR_B_lo = FR_Bn,f1,f1 // argt = 1 - (1 - |arg|)
+ nop.i 0
+}
+{ .mfi
+ ldfs FR_Half = [GR_ad_05]
+ (p7) fnma.s1 FR_B_lo = FR_Bp,f1,f1
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ (p6) fnma.s1 FR_E0 = FR_Yn,FR_Bn,f1 // e = 1-b*y
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ (p6) fma.s1 FR_Y0 = FR_Yn,f1,f0
+ (p8) br.cond.spnt atanhl_gt_one // |arg| > 1
+};;
+{ .mfi
+ nop.m 0
+ (p7) fnma.s1 FR_E0 = FR_Yp,FR_Bp,f1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ (p6) fma.s1 FR_Q0 = FR_A,FR_Yn,f0 // q = a*y
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ (p7) fma.s1 FR_Q0 = FR_A,FR_Yp,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ (p7) fma.s1 FR_Y0 = FR_Yp,f1,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fclass.nm p10,p0 = f8,0x1FF // test for unsupported
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_E2 = FR_E0,FR_E0,FR_E0 // e2 = e+e^2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_E1 = FR_E0,FR_E0,f0 // e1 = e^2
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+// Return generated NaN or other value for unsupported values.
+(p10) fma.s0 f8 = f8, f0, f0
+(p10) br.ret.spnt b0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y1 = FR_Y0,FR_E2,FR_Y0 // y1 = y+y*e2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_E3 = FR_E1,FR_E1,FR_E0 // e3 = e+e1^2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_B_lo = FR_abs_x,f1,FR_B_lo // b_lo = argt-|arg|
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y2 = FR_Y1,FR_E3,FR_Y0 // y2 = y+y1*e3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_R0 = FR_B,FR_Q0,FR_A // r = a-b*q
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_E4 = FR_B,FR_Y2,f1 // e4 = 1-b*y2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_X = FR_R0,FR_Y2,FR_Q0 // x = q+r*y2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Z = FR_X,f1,f1 // x+1
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ (p6) fnma.s1 FR_Half = FR_Half,f1,f0 // sign(arg)/2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_Y3 = FR_Y2,FR_E4,FR_Y2 // y3 = y2+y2*e4
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_R1 = FR_B,FR_X,FR_A // r1 = a-b*x
+ nop.i 0
+};;
+{ .mfi
+ getf.sig GR_signif = FR_Z // get significand of x+1
+ nop.f 0
+ nop.i 0
+};;
+
+
+{ .mfi
+ add GR_ad_q = -0x060,GR_ad_z_1
+ nop.f 0
+ extr.u GR_Index1 = GR_signif,59,4 // get high 4 bits of signif
+}
+{ .mfi
+ add GR_ad_tbl_3 = 0x280,GR_ad_z_1 // point to Constants_G_H_h3
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ shladd GR_ad_z_1 = GR_Index1,2,GR_ad_z_1 // point to Z_1
+ nop.f 0
+ extr.u GR_X_0 = GR_signif,49,15 // get high 15 bits of significand
+};;
+{ .mfi
+ ld4 GR_Z_1 = [GR_ad_z_1] // load Z_1
+ fmax.s1 FR_AA = FR_X,f1 // for S_lo,form AA = max(X,1.0)
+ nop.i 0
+}
+{ .mfi
+ shladd GR_ad_tbl_1 = GR_Index1,4,GR_ad_tbl_1 // point to G_1
+ nop.f 0
+ mov GR_Bias = 0x0FFFF // exponent bias
+};;
+{ .mfi
+ ldfps FR_G,FR_H = [GR_ad_tbl_1],8 // load G_1,H_1
+ fmerge.se FR_S_hi = f1,FR_Z // form |x+1|
+ nop.i 0
+};;
+{ .mfi
+ getf.exp GR_N = FR_Z // get N = exponent of x+1
+ nop.f 0
+ nop.i 0
+}
+{ .mfi
+ ldfd FR_h = [GR_ad_tbl_1] // load h_1
+ fnma.s1 FR_R1 = FR_B_lo,FR_X,FR_R1 // r1 = r1-b_lo*x
+ nop.i 0
+};;
+{ .mfi
+ ldfe FR_log2_hi = [GR_ad_q],16 // load log2_hi
+ nop.f 0
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // get bits 30-15 of X_0 * Z_1
+};;
+//
+// For performance,don't use result of pmpyshr2.u for 4 cycles.
+//
+{ .mfi
+ ldfe FR_log2_lo = [GR_ad_q],16 // load log2_lo
+ nop.f 0
+ sub GR_N = GR_N,GR_Bias
+};;
+{ .mfi
+ ldfe FR_Q4 = [GR_ad_q],16 // load Q4
+ fms.s1 FR_S_lo = FR_AA,f1,FR_Z // form S_lo = AA - Z
+ sub GR_minus_N = GR_Bias,GR_N // form exponent of 2^(-N)
+};;
+{ .mmf
+ ldfe FR_Q3 = [GR_ad_q],16 // load Q3
+ // put integer N into rightmost significand
+ setf.sig FR_float_N = GR_N
+ fmin.s1 FR_BB = FR_X,f1 // for S_lo,form BB = min(X,1.0)
+};;
+{ .mfi
+ ldfe FR_Q2 = [GR_ad_q],16 // load Q2
+ nop.f 0
+ extr.u GR_Index2 = GR_X_1,6,4 // extract bits 6-9 of X_1
+};;
+{ .mmi
+ ldfe FR_Q1 = [GR_ad_q] // load Q1
+ shladd GR_ad_z_2 = GR_Index2,2,GR_ad_z_2 // point to Z_2
+ nop.i 0
+};;
+{ .mmi
+ ld4 GR_Z_2 = [GR_ad_z_2] // load Z_2
+ shladd GR_ad_tbl_2 = GR_Index2,4,GR_ad_tbl_2 // point to G_2
+ nop.i 0
+};;
+{ .mfi
+ ldfps FR_G2,FR_H2 = [GR_ad_tbl_2],8 // load G_2,H_2
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ ldfd FR_h2 = [GR_ad_tbl_2] // load h_2
+ fma.s1 FR_S_lo = FR_S_lo,f1,FR_BB // S_lo = S_lo + BB
+ nop.i 0
+}
+{ .mfi
+ setf.exp FR_2_to_minus_N = GR_minus_N // form 2^(-N)
+ fma.s1 FR_X_lo = FR_R1,FR_Y3,f0 // x_lo = r1*y3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ nop.f 0
+ pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // get bits 30-15 of X_1 * Z_2
+};;
+//
+// For performance,don't use result of pmpyshr2.u for 4 cycles
+//
+{ .mfi
+ add GR_ad2_tbl_3 = 8,GR_ad_tbl_3
+ nop.f 0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+
+//
+// Now GR_X_2 can be used
+//
+{ .mfi
+ nop.m 0
+ nop.f 0
+ extr.u GR_Index3 = GR_X_2,1,5 // extract bits 1-5 of X_2
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_S_lo = FR_S_lo,f1,FR_X_lo // S_lo = S_lo + Arg_lo
+ nop.i 0
+};;
+
+{ .mfi
+ shladd GR_ad_tbl_3 = GR_Index3,4,GR_ad_tbl_3 // point to G_3
+ fcvt.xf FR_float_N = FR_float_N
+ nop.i 0
+}
+{ .mfi
+ shladd GR_ad2_tbl_3 = GR_Index3,4,GR_ad2_tbl_3 // point to h_3
+ fma.s1 FR_Q1 = FR_Q1,FR_Half,f0 // sign(arg)*Q1/2
+ nop.i 0
+};;
+{ .mmi
+ ldfps FR_G3,FR_H3 = [GR_ad_tbl_3],8 // load G_3,H_3
+ ldfd FR_h3 = [GR_ad2_tbl_3] // load h_3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_G = FR_G,FR_G2 // G = G_1 * G_2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_H = FR_H,FR_H2 // H = H_1 + H_2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_h = FR_h,FR_h2 // h = h_1 + h_2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ // S_lo = S_lo * 2^(-N)
+ fma.s1 FR_S_lo = FR_S_lo,FR_2_to_minus_N,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_G = FR_G,FR_G3 // G = (G_1 * G_2) * G_3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_H = FR_H,FR_H3 // H = (H_1 + H_2) + H_3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fadd.s1 FR_h = FR_h,FR_h3 // h = (h_1 + h_2) + h_3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fms.s1 FR_r = FR_G,FR_S_hi,f1 // r = G * S_hi - 1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Y_hi = N * log2_hi + H
+ fma.s1 FR_Y_hi = FR_float_N,FR_log2_hi,FR_H
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_h = FR_float_N,FR_log2_lo,FR_h // h = N * log2_lo + h
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_r = FR_G,FR_S_lo,FR_r // r = G * S_lo + (G * S_hi - 1)
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_poly_lo = FR_r,FR_Q4,FR_Q3 // poly_lo = r * Q4 + Q3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fmpy.s1 FR_rsq = FR_r,FR_r // rsq = r * r
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_05r = FR_r,FR_Half,f0 // sign(arg)*r/2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ // poly_lo = poly_lo * r + Q2
+ fma.s1 FR_poly_lo = FR_poly_lo,FR_r,FR_Q2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_rcub = FR_rsq,FR_r,f0 // rcub = r^3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ // poly_hi = sing(arg)*(Q1*r^2 + r)/2
+ fma.s1 FR_poly_hi = FR_Q1,FR_rsq,FR_05r
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ // poly_lo = poly_lo*r^3 + h
+ fma.s1 FR_poly_lo = FR_poly_lo,FR_rcub,FR_h
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ // Y_lo = poly_hi + poly_lo/2
+ fma.s0 FR_Y_lo = FR_poly_lo,FR_Half,FR_poly_hi
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+ // Result = arctanh(x) = Y_hi/2 + Y_lo
+ fma.s0 f8 = FR_Y_hi,FR_Half,FR_Y_lo
+ br.ret.sptk b0
+};;
+
+// Taylor's series
+atanhl_near_zero:
+{ .mfi
+ nop.m 0
+ fma.s1 FR_x3 = FR_x2,f8,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_x4 = FR_x2,FR_x2,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C17 = FR_C17,FR_x2,FR_C15
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C13 = FR_C13,FR_x2,FR_C11
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C9 = FR_C9,FR_x2,FR_C7
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C5 = FR_C5,FR_x2,FR_C3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_x8 = FR_x4,FR_x4,f0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C17 = FR_C17,FR_x4,FR_C13
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C9 = FR_C9,FR_x4,FR_C5
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_C17 = FR_C17,FR_x8,FR_C9
+ nop.i 0
+};;
+{ .mfb
+ nop.m 0
+ fma.s0 f8 = FR_C17,FR_x3,f8
+ br.ret.sptk b0
+};;
+
+atanhl_eq_one:
+{ .mfi
+ nop.m 0
+ frcpa.s0 FR_Rcp,p0 = f1,f0 // get inf,and raise Z flag
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fmerge.s FR_Arg_X = f8, f8
+ nop.i 0
+};;
+{ .mfb
+ mov GR_Parameter_TAG = 130
+ fmerge.s FR_RESULT = f8,FR_Rcp // result is +-inf
+ br.cond.sptk __libm_error_region // exit if |x| = 1.0
+};;
+
+atanhl_gt_one:
+{ .mfi
+ nop.m 0
+ fmerge.s FR_Arg_X = f8, f8
+ nop.i 0
+};;
+{ .mfb
+ mov GR_Parameter_TAG = 129
+ frcpa.s0 FR_RESULT,p0 = f0,f0 // get QNaN,and raise invalid
+ br.cond.sptk __libm_error_region // exit if |x| > 1.0
+};;
+
+GLOBAL_LIBM_END(atanhl)
+libm_alias_ldouble_other (atanh, atanh)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Arg_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0,GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_Arg_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region#)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_cosh.S b/sysdeps/ia64/fpu/e_cosh.S
new file mode 100644
index 0000000000..07bf9f6587
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_cosh.S
@@ -0,0 +1,866 @@
+.file "cosh.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/07/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/15/02 Improved speed with new algorithm
+// 03/31/05 Reformatted delimiters between data tables
+
+// API
+//==============================================================
+// double cosh(double)
+
+// Overview of operation
+//==============================================================
+// Case 1: 0 < |x| < 0.25
+// Evaluate cosh(x) by a 12th order polynomial
+// Care is take for the order of multiplication; and A2 is not exactly 1/4!,
+// A3 is not exactly 1/6!, etc.
+// cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8 + A5*x^10 + A6*x^12)
+//
+// Case 2: 0.25 < |x| < 710.47586
+// Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
+//
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
+
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by 5th order polynomial
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
+
+
+// Special values
+//==============================================================
+// cosh(+0) = 1.0
+// cosh(-0) = 1.0
+
+// cosh(+qnan) = +qnan
+// cosh(-qnan) = -qnan
+// cosh(+snan) = +qnan
+// cosh(-snan) = -qnan
+
+// cosh(-inf) = +inf
+// cosh(+inf) = +inf
+
+// Overflow and Underflow
+//=======================
+// cosh(x) = largest double normal when
+// x = 710.47586 = 0x408633ce8fb9f87d
+//
+// There is no underflow.
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6 -> f15, f32 -> f61
+
+// General registers used:
+// r14 -> r40
+
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//==============================================================
+
+rRshf = r14
+rN_neg = r14
+rAD_TB1 = r15
+rAD_TB2 = r16
+rAD_P = r17
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+rM = r21
+rBiased_M = r21
+rSig_inv_ln2 = r22
+rIndex_1_neg = r22
+rExp_bias = r23
+rExp_bias_minus_1 = r23
+rExp_mask = r24
+rTmp = r24
+rGt_ln = r24
+rIndex_2_16_neg = r24
+rM_neg = r25
+rBiased_M_neg = r25
+rRshf_2to56 = r26
+rAD_T1_neg = r26
+rExp_2tom56 = r28
+rAD_T2_neg = r28
+rAD_T1 = r29
+rAD_T2 = r30
+rSignexp_x = r31
+rExp_x = r31
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP4 = f13
+fP3 = f14
+fP2 = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fNormX = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+f2M = f41
+fS1 = f42
+fT1 = f42
+fS2 = f43
+fT2 = f43
+fS = f43
+fWre_urm_f8 = f44
+fAbsX = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_NORM_ARG = f46
+fXsq = f47
+fX4 = f48
+fGt_pln = f49
+fTmp = f49
+
+fP54 = f50
+fP5432 = f50
+fP32 = f51
+fP = f52
+fP54_neg = f53
+fP5432_neg = f53
+fP32_neg = f54
+fP_neg = f55
+fF_neg = f56
+
+f2M_neg = f57
+fS1_neg = f58
+fT1_neg = f58
+fS2_neg = f59
+fT2_neg = f59
+fS_neg = f59
+fExp = f60
+fExp_neg = f61
+
+fA6 = f50
+fA65 = f50
+fA6543 = f50
+fA654321 = f50
+fA5 = f51
+fA4 = f52
+fA43 = f52
+fA3 = f53
+fA2 = f54
+fA21 = f54
+fA1 = f55
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 16
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is fNfloat.
+
+
+LOCAL_OBJECT_START(exp_table_1)
+data8 0x408633ce8fb9f87e // smallest dbl overflow arg
+data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+//
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+//
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(exp_table_2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_2)
+
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
+
+LOCAL_OBJECT_START(cosh_p_table)
+data8 0x8FA02AC65BCBD5BC, 0x00003FE2 // A6
+data8 0xD00D00D1021D7370, 0x00003FEF // A4
+data8 0xAAAAAAAAAAAAAB80, 0x00003FFA // A2
+data8 0x93F27740C0C2F1CC, 0x00003FE9 // A5
+data8 0xB60B60B60B4FE884, 0x00003FF5 // A3
+data8 0x8000000000000000, 0x00003FFE // A1
+LOCAL_OBJECT_END(cosh_p_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(cosh)
+
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ addl rAD_TB1 = @ltoff(exp_table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
+}
+;;
+
+{ .mfi
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p6,p0 = f8,0x0b // Test for x=unorm
+ mov rExp_mask = 0x1ffff
+}
+{ .mfi
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
+}
+;;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
+
+{ .mfi
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ nop.i 999
+}
+{ .mlx
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e3 // Test for x=inf, nan, NaT
+ nop.i 0
+}
+{ .mfb
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+ nop.f 0
+(p6) br.cond.spnt COSH_UNORM // Branch if x=unorm
+}
+;;
+
+COSH_COMMON:
+{ .mfi
+ ldfe fLn2_by_128_hi = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+(p8) fma.d.s0 f8 = f1,f1,f0 // quick exit for x=0
+(p8) br.ret.spnt b0
+}
+;;
+
+{ .mfi
+ ldfe fLn2_by_128_lo = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+(p10) fma.d.s0 f8 = f8,f8,f0 // Result if x=inf, nan, NaT
+(p10) br.ret.spnt b0 // quick exit for x=inf, nan, NaT
+}
+;;
+
+// After that last load rAD_TB1 points to the beginning of table 1
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
+}
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt COSH_SMALL // Branch if 0 < |x| < 2^-2
+}
+;;
+
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
+
+{ .mfi
+ add rAD_P = 0x180, rAD_TB1
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ add rAD_TB2 = 0x100, rAD_TB1
+}
+;;
+
+// Divide arguments into the following categories:
+// Certain Safe - 0.25 <= |x| <= MAX_DBL_NORM_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
+//
+// If the input is really a double arg, then there will never be
+// "Possible Overflow" arguments.
+//
+
+{ .mfi
+ ldfpd fP5, fP4 = [rAD_P] ,16
+ fcmp.ge.s1 p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
+}
+;;
+
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into rN.
+
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
+
+{ .mfi
+ ldfpd fP3, fP2 = [rAD_P]
+(p14) fcmp.gt.unc.s1 p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p15) br.cond.spnt COSH_CERTAIN_OVERFLOW
+}
+;;
+
+{ .mfi
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ mov rExp_bias_minus_1 = 0xfffe
+}
+;;
+
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+
+// rM has true M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
+{ .mfi
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
+}
+{ .mfi
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ sub rN_neg = r0, rN
+}
+;;
+
+{ .mmi
+ and rIndex_1_neg = 0x0f, rN_neg
+ add rBiased_M = rExp_bias_minus_1, rM
+ shr rM_neg = rN_neg, 0x7
+}
+{ .mmi
+ and rIndex_2_16_neg = 0x70, rN_neg
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
+}
+;;
+
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
+
+{ .mmi
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
+}
+{ .mmi
+ add rBiased_M_neg = rExp_bias_minus_1, rM_neg
+ add rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
+ shladd rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
+}
+;;
+
+// Create Scale = 2^M
+// Load T1 and T2
+{ .mmi
+ ldfe fT1 = [rAD_T1]
+ nop.m 0
+ nop.i 0
+}
+{ .mmf
+ setf.exp f2M_neg = rBiased_M_neg
+ ldfe fT2_neg = [rAD_T2_neg]
+ fma.s1 fF_neg = fNfloat, fLn2_by_128_lo, f1
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
+}
+{ .mfi
+ ldfe fT1_neg = [rAD_T1_neg]
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fP54_neg = fR, fP5, fP4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 fP32_neg = fR, fP3, fP2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP5432_neg = fRsq, fP54_neg, fP32_neg
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fS1_neg = f2M_neg,fT1_neg,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS2_neg = fF_neg,fT2_neg,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fms.s1 fP_neg = fRsq, fP5432_neg, fR
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS_neg = fS1_neg,fS2_neg,f0
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+(p14) br.cond.spnt COSH_POSSIBLE_OVERFLOW
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fExp = fS, fP, fS
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fExp_neg = fS_neg, fP_neg, fS_neg
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fExp, f1, fExp_neg
+ br.ret.sptk b0 // Normal path exit
+}
+;;
+
+// Here if 0 < |x| < 0.25
+COSH_SMALL:
+{ .mmf
+ add rAD_T1 = 0x1a0, rAD_TB1
+ add rAD_T2 = 0x1d0, rAD_TB1
+}
+;;
+
+{ .mmf
+ ldfe fA6 = [rAD_T1],16
+ ldfe fA5 = [rAD_T2],16
+ nop.f 0
+}
+;;
+
+{ .mmi
+ ldfe fA4 = [rAD_T1],16
+ ldfe fA3 = [rAD_T2],16
+ nop.i 0
+}
+;;
+
+{ .mmi
+ ldfe fA2 = [rAD_T1],16
+ ldfe fA1 = [rAD_T2],16
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA65 = fXsq, fA6, fA5
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA6543 = fX4, fA65, fA43
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA654321 = fX4, fA6543, fA21
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 fTmp = fA6, fA6
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fA654321, fXsq, f1
+ br.ret.sptk b0 // Exit if 0 < |x| < 0.25
+}
+;;
+
+
+COSH_POSSIBLE_OVERFLOW:
+
+// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
+
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
+
+{ .mfi
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt COSH_CERTAIN_OVERFLOW // Branch if overflow
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+COSH_CERTAIN_OVERFLOW:
+{ .mmi
+ sub rTmp = rExp_mask, r0, 1
+;;
+ setf.exp fTmp = rTmp
+ nop.i 0
+}
+;;
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 64
+ fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
+}
+;;
+
+// Here if x unorm
+COSH_UNORM:
+{ .mfb
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk COSH_COMMON
+}
+;;
+
+GLOBAL_IEEE754_END(cosh)
+libm_alias_double_other (__cosh, cosh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshf.S b/sysdeps/ia64/fpu/e_coshf.S
new file mode 100644
index 0000000000..9117ce5543
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_coshf.S
@@ -0,0 +1,711 @@
+.file "coshf.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//*********************************************************************
+// 02/02/00 Initial version
+// 02/16/00 The error tag for coshf overflow changed to 65 (from 64).
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/07/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/15/02 Improved algorithm based on expf
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//*********************************************************************
+// float coshf(float)
+//
+// Overview of operation
+//*********************************************************************
+// Case 1: 0 < |x| < 0.25
+// Evaluate cosh(x) by a 8th order polynomial
+// Care is take for the order of multiplication; and A2 is not exactly 1/4!,
+// A3 is not exactly 1/6!, etc.
+// cosh(x) = 1 + (A1*x^2 + A2*x^4 + A3*x^6 + A4*x^8)
+//
+// Case 2: 0.25 < |x| < 89.41598
+// Algorithm is based on the identity cosh(x) = ( exp(x) + exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
+//
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 64/log2
+// NJ = int(w)
+// x = NJ*log2/64 + R
+
+// NJ = 64*n + j
+// x = n*log2 + (log2/64)*j + R
+//
+// So, exp(x) = 2^n * 2^(j/64)* exp(R)
+//
+// T = 2^n * 2^(j/64)
+// Construct 2^n
+// Get 2^(j/64) table
+// actually all the entries of 2^(j/64) table are stored in DP and
+// with exponent bits set to 0 -> multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
+//
+
+// The final result is reconstructed as follows
+// exp(x) = T + T*P
+
+// Special values
+//*********************************************************************
+// coshf(+0) = 1.0
+// coshf(-0) = 1.0
+
+// coshf(+qnan) = +qnan
+// coshf(-qnan) = -qnan
+// coshf(+snan) = +qnan
+// coshf(-snan) = -qnan
+
+// coshf(-inf) = +inf
+// coshf(+inf) = +inf
+
+// Overflow and Underflow
+//*********************************************************************
+// coshf(x) = largest single normal when
+// x = 89.41598 = 0x42b2d4fc
+//
+// There is no underflow.
+
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8 input, output
+// f6,f7, f9 -> f15, f32 -> f45
+
+// General registers used:
+// r2, r3, r16 -> r38
+
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r2
+rNJ_neg = r3
+
+rJ_neg = r16
+rN_neg = r17
+rSignexp_x = r18
+rExp_x = r18
+rExp_mask = r19
+rExp_bias = r20
+rAd1 = r21
+rAd2 = r22
+rJ = r23
+rN = r24
+rTblAddr = r25
+rA3 = r26
+rExpHalf = r27
+rLn2Div64 = r28
+rGt_ln = r29
+r17ones_m1 = r29
+rRightShifter = r30
+rJ_mask = r30
+r64DivLn2 = r31
+rN_mask = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fT = f33
+fMIN_SGL_OFLOW_ARG = f34
+fMAX_SGL_NORM_ARG = f35
+fRSqr = f36
+fA1 = f37
+fA21 = f37
+fA4 = f38
+fA43 = f38
+fA4321 = f38
+fX4 = f39
+fTmp = f39
+fGt_pln = f39
+fWre_urm_f8 = f40
+fXsq = f40
+fP_neg = f41
+fT_neg = f42
+fExp = f43
+fExp_neg = f44
+fAbsX = f45
+
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(_coshf_table)
+data4 0x42b2d4fd // Smallest single arg to overflow single result
+data4 0x42b2d4fc // Largest single arg to give normal single result
+data4 0x00000000 // pad
+data4 0x00000000 // pad
+//
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_coshf_table)
+
+LOCAL_OBJECT_START(cosh_p_table)
+data8 0x3efa3001dcf5905b // A4
+data8 0x3f56c1437543543e // A3
+data8 0x3fa5555572601504 // A2
+data8 0x3fdfffffffe2f097 // A1
+LOCAL_OBJECT_END(cosh_p_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(coshf)
+
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
+}
+{ .mlx
+ addl rTblAddr = @ltoff(_coshf_table),gp
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
+}
+;;
+
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p6, p0 = f8, 0x0b // Test for x=unorm
+ addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ addl rExpHalf = 0xFFFE, r0 // exponent of 1/2
+}
+;;
+
+{ .mfi
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p15, p0 = f8, 0x1e3 // test for NaT,NaN,Inf
+ nop.i 0
+}
+{ .mlx
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+}
+;;
+
+{ .mfi
+ mov rExp_mask = 0x1ffff
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP
+}
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt COSH_UNORM // Branch if x=unorm
+}
+;;
+
+COSH_COMMON:
+{ .mfi
+ setf.exp fA2 = rExpHalf // load A2 to FP reg
+ nop.f 0
+ mov rExp_bias = 0xffff
+}
+{ .mfb
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+(p15) fma.s.s0 f8 = f8, f8, f0 // result if x = NaT,NaN,Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,Inf
+}
+;;
+
+{ .mfi
+ // min overflow and max normal threshold
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
+ nop.f 0
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+}
+{ .mfb
+ setf.s fA3 = rA3 // load A3 to FP reg
+(p13) fma.s.s0 f8 = f1, f1, f0 // result if x = 0.0
+(p13) br.ret.spnt b0 // exit here if x =0.0
+}
+;;
+
+{ .mfi
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ add rTblAddr = 8, rTblAddr
+}
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt COSH_SMALL // Branch if 0 < |x| < 2^-2
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // check for overflow
+ fcmp.ge.s1 p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG
+ mov rJ_mask = 0x3f // 6-bit mask for J
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ // branch out if overflow
+(p12) br.cond.spnt COSH_CERTAIN_OVERFLOW
+}
+;;
+
+{ .mfi
+ getf.sig rNJ = fNint // bits of n, j
+ // check for possible overflow
+ fcmp.gt.s1 p13, p0 = fAbsX, fMAX_SGL_NORM_ARG
+ nop.i 0
+}
+;;
+
+{ .mfi
+ addl rN = 0xFFBF - 63, rNJ // biased and shifted n-1,j
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ and rJ = rJ_mask, rNJ // bits of j
+}
+{ .mfi
+ sub rNJ_neg = r0, rNJ // bits of n, j for -x
+ nop.f 0
+ andcm rN_mask = -1, rJ_mask // 0xff...fc0 to mask N
+}
+;;
+
+{ .mfi
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ nop.f 0
+ and rN = rN_mask, rN // biased, shifted n-1
+}
+{ .mfi
+ addl rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j
+ nop.f 0
+ and rJ_neg = rJ_mask, rNJ_neg // bits of j for -x
+}
+;;
+
+{ .mfi
+ ld8 rJ = [rJ] // Table value
+ nop.f 0
+ shl rN = rN, 46 // 2^(n-1) bits in DP format
+}
+{ .mfi
+ shladd rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x
+ nop.f 0
+ and rN_neg = rN_mask, rN_neg // biased, shifted n-1 for -x
+}
+;;
+
+{ .mfi
+ ld8 rJ_neg = [rJ_neg] // Table value for -x
+ nop.f 0
+ shl rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x
+}
+;;
+
+{ .mfi
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mmf
+ setf.d fT = rN // 2^(n-1) * 2^(j/64)
+ or rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+}
+;;
+
+{ .mfi
+ setf.d fT_neg = rN_neg // 2^(n-1) * 2^(j/64) for -x
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fP_neg = fA3, fR, fA2 // A3*R + A2 for -x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fms.s1 fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmpy.s0 fTmp = fLn2Div64, fLn2Div64 // Force inexact
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fExp = fP, fT, fT // exp(x)/2
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s1 fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2
+ // branch out if possible overflow result
+(p13) br.cond.spnt COSH_POSSIBLE_OVERFLOW
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // final result in the absence of overflow
+ fma.s.s0 f8 = fExp, f1, fExp_neg // result = (exp(x)+exp(-x))/2
+ // exit here in the absence of overflow
+ br.ret.sptk b0 // Exit main path, 0.25 <= |x| < 89.41598
+}
+;;
+
+// Here if 0 < |x| < 0.25. Evaluate 8th order polynomial.
+COSH_SMALL:
+{ .mmi
+ add rAd1 = 0x200, rTblAddr
+ add rAd2 = 0x210, rTblAddr
+ nop.i 0
+}
+;;
+
+{ .mmi
+ ldfpd fA4, fA3 = [rAd1]
+ ldfpd fA2, fA1 = [rAd2]
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA4321 = fX4, fA43, fA21
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 fTmp = fA4, fA4
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fA4321, fXsq, f1
+ br.ret.sptk b0 // Exit if 0 < |x| < 0.25
+}
+;;
+
+COSH_POSSIBLE_OVERFLOW:
+
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
+
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
+
+{ .mfi
+ mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt COSH_CERTAIN_OVERFLOW // Branch if overflow
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+// here if overflow
+COSH_CERTAIN_OVERFLOW:
+{ .mmi
+ addl r17ones_m1 = 0x1FFFE, r0
+;;
+ setf.exp fTmp = r17ones_m1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 65
+ fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
+}
+;;
+
+// Here if x unorm
+COSH_UNORM:
+{ .mfb
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk COSH_COMMON // Return to main path
+}
+;;
+
+GLOBAL_IEEE754_END(coshf)
+libm_alias_float_other (__cosh, cosh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_coshl.S b/sysdeps/ia64/fpu/e_coshl.S
new file mode 100644
index 0000000000..144ce1d6f3
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_coshl.S
@@ -0,0 +1,1095 @@
+.file "coshl.s"
+
+
+// Copyright (c) 2000 - 2002, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 01/23/01 Set inexact flag for large args.
+// 05/07/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 12/06/02 Improved performance
+//
+// API
+//==============================================================
+// long double = coshl(long double)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers:
+// r14 -> r40
+// predicate registers used:
+// p6 -> p11
+// floating-point registers used:
+// f9 -> f15; f32 -> f90;
+// f8 has input, then output
+//
+// Overview of operation
+//==============================================================
+// There are seven paths
+// 1. 0 < |x| < 0.25 COSH_BY_POLY
+// 2. 0.25 <=|x| < 32 COSH_BY_TBL
+// 3. 32 <= |x| < 11357.21655 COSH_BY_EXP (merged path with COSH_BY_TBL)
+// 4. |x| >= 11357.21655 COSH_HUGE
+// 5. x=0 Done with early exit
+// 6. x=inf,nan Done with early exit
+// 7. x=denormal COSH_DENORM
+//
+// For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
+// >= 11357.21655
+//
+//
+// 1. COSH_BY_POLY 0 < |x| < 0.25
+// ===============
+// Evaluate cosh(x) by a 12th order polynomial
+// Care is take for the order of multiplication; and P2 is not exactly 1/4!,
+// P3 is not exactly 1/6!, etc.
+// cosh(x) = 1 + (P1*x^2 + P2*x^4 + P3*x^6 + P4*x^8 + P5*x^10 + P6*x^12)
+//
+// 2. COSH_BY_TBL 0.25 <= |x| < 32.0
+// =============
+// cosh(x) = cosh(B+R)
+// = cosh(B)cosh(R) + sinh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calculate M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// cosh(B) = 1/2(e^B + e^-B)
+// = 1/2(2^N * 2^(j*log2/64) + 2^-N * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+//
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses
+// the B coefficients
+//
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// cosh(B) = C_hi + C_lo
+// sinh(B) = S_hi
+// cosh(x) = cosh(B)cosh(R) + sinh(B)sinh(R)
+//
+// 3. COSH_BY_EXP 32.0 <= |x| < 11357.21655 ( 400c b174 ddc0 31ae c0ea )
+// ==============
+// Can approximate result by exp(x)/2 in this region.
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) + Tjlo
+// cosh(x) = Y_hi + Y_lo
+//
+// 4. COSH_HUGE |x| >= 11357.21655 ( 400c b174 ddc0 31ae c0ea )
+// ============
+// Set error tag and call error support
+//
+//
+// Assembly macros
+//==============================================================
+r_ad5 = r14
+r_rshf_2to57 = r15
+r_exp_denorm = r15
+r_ad_mJ_lo = r15
+r_ad_J_lo = r16
+r_2Nm1 = r17
+r_2mNm1 = r18
+r_exp_x = r18
+r_ad_J_hi = r19
+r_ad2o = r19
+r_ad_mJ_hi = r20
+r_mj = r21
+r_ad2e = r22
+r_ad3 = r23
+r_ad1 = r24
+r_Mmj = r24
+r_rshf = r25
+r_M = r25
+r_N = r25
+r_jshf = r26
+r_exp_2tom57 = r26
+r_j = r26
+r_exp_mask = r27
+r_signexp_x = r28
+r_signexp_0_5 = r28
+r_exp_0_25 = r29
+r_sig_inv_ln2 = r30
+r_exp_32 = r30
+r_exp_huge = r30
+r_ad4 = r31
+
+GR_SAVE_PFS = r34
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+f_ABS_X = f9
+f_X2 = f10
+f_X4 = f11
+f_tmp = f14
+f_RSHF = f15
+
+f_Inv_log2by64 = f32
+f_log2by64_lo = f33
+f_log2by64_hi = f34
+f_A1 = f35
+
+f_A2 = f36
+f_A3 = f37
+f_Rcub = f38
+f_M_temp = f39
+f_R_temp = f40
+
+f_Rsq = f41
+f_R = f42
+f_M = f43
+f_B1 = f44
+f_B2 = f45
+
+f_B3 = f46
+f_peven_temp1 = f47
+f_peven_temp2 = f48
+f_peven = f49
+f_podd_temp1 = f50
+
+f_podd_temp2 = f51
+f_podd = f52
+f_poly65 = f53
+f_poly6543 = f53
+f_poly6to1 = f53
+f_poly43 = f54
+f_poly21 = f55
+
+f_X3 = f56
+f_INV_LN2_2TO63 = f57
+f_RSHF_2TO57 = f58
+f_2TOM57 = f59
+f_smlst_oflow_input = f60
+
+f_pre_result = f61
+f_huge = f62
+f_spos = f63
+f_sneg = f64
+f_Tjhi = f65
+
+f_Tjlo = f66
+f_Tmjhi = f67
+f_Tmjlo = f68
+f_S_hi = f69
+f_SC_hi_temp = f70
+
+f_C_lo_temp1 = f71
+f_C_lo_temp2 = f72
+f_C_lo_temp3 = f73
+f_C_lo_temp4 = f73
+f_C_lo = f74
+f_C_hi = f75
+
+f_Y_hi = f77
+f_Y_lo_temp = f78
+f_Y_lo = f79
+f_NORM_X = f80
+
+f_P1 = f81
+f_P2 = f82
+f_P3 = f83
+f_P4 = f84
+f_P5 = f85
+
+f_P6 = f86
+f_Tjhi_spos = f87
+f_Tjlo_spos = f88
+f_huge = f89
+f_signed_hi_lo = f90
+
+
+// Data tables
+//==============================================================
+
+// DO NOT CHANGE ORDER OF THESE TABLES
+RODATA
+
+.align 16
+LOCAL_OBJECT_START(cosh_arg_reduction)
+// data8 0xB8AA3B295C17F0BC, 0x00004005 // 64/log2 -- signif loaded with setf
+ data8 0xB17217F7D1000000, 0x00003FF8 // log2/64 high part
+ data8 0xCF79ABC9E3B39804, 0x00003FD0 // log2/64 low part
+ data8 0xb174ddc031aec0ea, 0x0000400c // Smallest x to overflow (11357.21655)
+LOCAL_OBJECT_END(cosh_arg_reduction)
+
+LOCAL_OBJECT_START(cosh_p_table)
+ data8 0x8FA02AC65BCBD5BC, 0x00003FE2 // P6
+ data8 0xD00D00D1021D7370, 0x00003FEF // P4
+ data8 0xAAAAAAAAAAAAAB80, 0x00003FFA // P2
+ data8 0x93F27740C0C2F1CC, 0x00003FE9 // P5
+ data8 0xB60B60B60B4FE884, 0x00003FF5 // P3
+ data8 0x8000000000000000, 0x00003FFE // P1
+LOCAL_OBJECT_END(cosh_p_table)
+
+LOCAL_OBJECT_START(cosh_ab_table)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC // A1
+ data8 0x88888888884ECDD5, 0x00003FF8 // A2
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2 // A3
+ data8 0x8000000000000002, 0x00003FFE // B1
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA // B2
+ data8 0xB60B6CC96BDB144D, 0x00003FF5 // B3
+LOCAL_OBJECT_END(cosh_ab_table)
+
+LOCAL_OBJECT_START(cosh_j_hi_table)
+ data8 0xB504F333F9DE6484, 0x00003FFE
+ data8 0xB6FD91E328D17791, 0x00003FFE
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE
+ data8 0xBD08A39F580C36BF, 0x00003FFE
+ data8 0xBF1799B67A731083, 0x00003FFE
+ data8 0xC12C4CCA66709456, 0x00003FFE
+ data8 0xC346CCDA24976407, 0x00003FFE
+ data8 0xC5672A115506DADD, 0x00003FFE
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE
+ data8 0xCE248C151F8480E4, 0x00003FFE
+ data8 0xD06333DAEF2B2595, 0x00003FFE
+ data8 0xD2A81D91F12AE45A, 0x00003FFE
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE
+ data8 0xD99D15C278AFD7B6, 0x00003FFE
+ data8 0xDBFBB797DAF23755, 0x00003FFE
+ data8 0xDE60F4825E0E9124, 0x00003FFE
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE
+ data8 0xE33F8972BE8A5A51, 0x00003FFE
+ data8 0xE5B906E77C8348A8, 0x00003FFE
+ data8 0xE8396A503C4BDC68, 0x00003FFE
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE
+ data8 0xED4F301ED9942B84, 0x00003FFE
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
+ data8 0xF281773C59FFB13A, 0x00003FFE
+ data8 0xF5257D152486CC2C, 0x00003FFE
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE
+ data8 0xFA83B2DB722A033A, 0x00003FFE
+ data8 0xFD3E0C0CF486C175, 0x00003FFE
+ data8 0x8000000000000000, 0x00003FFF // Center of table
+ data8 0x8164D1F3BC030773, 0x00003FFF
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF
+ data8 0x843A28C3ACDE4046, 0x00003FFF
+ data8 0x85AAC367CC487B15, 0x00003FFF
+ data8 0x871F61969E8D1010, 0x00003FFF
+ data8 0x88980E8092DA8527, 0x00003FFF
+ data8 0x8A14D575496EFD9A, 0x00003FFF
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
+ data8 0x8EA4398B45CD53C0, 0x00003FFF
+ data8 0x9031DC431466B1DC, 0x00003FFF
+ data8 0x91C3D373AB11C336, 0x00003FFF
+ data8 0x935A2B2F13E6E92C, 0x00003FFF
+ data8 0x94F4EFA8FEF70961, 0x00003FFF
+ data8 0x96942D3720185A00, 0x00003FFF
+ data8 0x9837F0518DB8A96F, 0x00003FFF
+ data8 0x99E0459320B7FA65, 0x00003FFF
+ data8 0x9B8D39B9D54E5539, 0x00003FFF
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF
+ data8 0x9EF5326091A111AE, 0x00003FFF
+ data8 0xA0B0510FB9714FC2, 0x00003FFF
+ data8 0xA27043030C496819, 0x00003FFF
+ data8 0xA43515AE09E6809E, 0x00003FFF
+ data8 0xA5FED6A9B15138EA, 0x00003FFF
+ data8 0xA7CD93B4E965356A, 0x00003FFF
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
+ data8 0xAB7A39B5A93ED337, 0x00003FFF
+ data8 0xAD583EEA42A14AC6, 0x00003FFF
+ data8 0xAF3B78AD690A4375, 0x00003FFF
+ data8 0xB123F581D2AC2590, 0x00003FFF
+ data8 0xB311C412A9112489, 0x00003FFF
+ data8 0xB504F333F9DE6484, 0x00003FFF
+LOCAL_OBJECT_END(cosh_j_hi_table)
+
+LOCAL_OBJECT_START(cosh_j_lo_table)
+ data4 0x1EB2FB13
+ data4 0x1CE2CBE2
+ data4 0x1DDC3CBC
+ data4 0x1EE9AA34
+ data4 0x9EAEFDC1
+ data4 0x9DBF517B
+ data4 0x1EF88AFB
+ data4 0x1E03B216
+ data4 0x1E78AB43
+ data4 0x9E7B1747
+ data4 0x9EFE3C0E
+ data4 0x9D36F837
+ data4 0x9DEE53E4
+ data4 0x9E24AE8E
+ data4 0x1D912473
+ data4 0x1EB243BE
+ data4 0x1E669A2F
+ data4 0x9BBC610A
+ data4 0x1E761035
+ data4 0x9E0BE175
+ data4 0x1CCB12A1
+ data4 0x1D1BFE90
+ data4 0x1DF2F47A
+ data4 0x1EF22F22
+ data4 0x9E3F4A29
+ data4 0x1EC01A5B
+ data4 0x1E8CAC3A
+ data4 0x9DBB3FAB
+ data4 0x1EF73A19
+ data4 0x9BB795B5
+ data4 0x1EF84B76
+ data4 0x9EF5818B
+ data4 0x00000000 // Center of table
+ data4 0x1F77CACA
+ data4 0x1EF8A91D
+ data4 0x1E57C976
+ data4 0x9EE8DA92
+ data4 0x1EE85C9F
+ data4 0x1F3BF1AF
+ data4 0x1D80CA1E
+ data4 0x9D0373AF
+ data4 0x9F167097
+ data4 0x1EB70051
+ data4 0x1F6EB029
+ data4 0x1DFD6D8E
+ data4 0x9EB319B0
+ data4 0x1EBA2BEB
+ data4 0x1F11D537
+ data4 0x1F0D5A46
+ data4 0x9E5E7BCA
+ data4 0x9F3AAFD1
+ data4 0x9E86DACC
+ data4 0x9F3EDDC2
+ data4 0x1E496E3D
+ data4 0x9F490BF6
+ data4 0x1DD1DB48
+ data4 0x1E65EBFB
+ data4 0x9F427496
+ data4 0x1F283C4A
+ data4 0x1F4B0047
+ data4 0x1F130152
+ data4 0x9E8367C0
+ data4 0x9F705F90
+ data4 0x1EFB3C53
+ data4 0x1F32FB13
+LOCAL_OBJECT_END(cosh_j_lo_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(coshl)
+
+{ .mlx
+ getf.exp r_signexp_x = f8 // Get signexp of x, must redo if unorm
+ movl r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ addl r_ad1 = @ltoff(cosh_arg_reduction), gp
+ movl r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
+}
+;;
+
+{ .mfi
+ ld8 r_ad1 = [r_ad1]
+ fmerge.s f_ABS_X = f0,f8
+ mov r_exp_0_25 = 0x0fffd // Form exponent for 0.25
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 f_NORM_X = f8
+ mov r_exp_2tom57 = 0xffff-57
+}
+;;
+
+{ .mfi
+ setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
+ fclass.m p10,p0 = f8, 0x0b // Test for denorm
+ mov r_exp_mask = 0x1ffff
+}
+{ .mlx
+ setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
+ movl r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p0 = f8, 0x07 // Test if x=0
+ nop.i 0
+}
+{ .mfi
+ setf.exp f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
+ nop.f 0
+ add r_ad3 = 0x90, r_ad1 // Point to ab_table
+}
+;;
+
+{ .mfi
+ setf.d f_RSHF = r_rshf // Form right shift const 1.100 * 2^63
+ fclass.m p6,p0 = f8, 0xe3 // Test if x nan, inf
+ add r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
+}
+{ .mib
+ add r_ad2e = 0x20, r_ad1 // Point to p_table
+ nop.i 0
+(p10) br.cond.spnt COSH_DENORM // Branch if x denorm
+}
+;;
+
+// Common path -- return here from COSH_DENORM if x is unnorm
+COSH_COMMON:
+{ .mfi
+ ldfe f_smlst_oflow_input = [r_ad2e],16
+(p7) fma.s0 f8 = f1, f1, f0 // Result = 1.0 if x=0
+ add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
+}
+{ .mib
+ ldfe f_log2by64_hi = [r_ad1],16
+ and r_exp_x = r_exp_mask, r_signexp_x
+(p7) br.ret.spnt b0 // Exit if x=0
+}
+;;
+
+// Get the A coefficients for COSH_BY_TBL
+{ .mfi
+ ldfe f_A1 = [r_ad3],16
+ fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
+ cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
+}
+{ .mfb
+ add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
+(p6) fma.s0 f8 = f8,f8,f0 // Result for x nan, inf
+(p6) br.ret.spnt b0 // Exit for x nan, inf
+}
+;;
+
+// Calculate X2 = ax*ax for COSH_BY_POLY
+{ .mfi
+ ldfe f_log2by64_lo = [r_ad1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ ldfe f_A2 = [r_ad3],16
+ fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
+(p7) br.cond.spnt COSH_BY_POLY
+}
+;;
+
+// Here if |x| >= 0.25
+COSH_BY_TBL:
+// ******************************************************
+// STEP 1 (TBL and EXP) - Argument reduction
+// ******************************************************
+// Get the following constants.
+// Inv_log2by64
+// log2by64_hi
+// log2by64_lo
+
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
+// 0xffff + (N-1) = 0xffff +N -1
+// 0xffff - (N +1) = 0xffff -N -1
+
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// f_M = M = truncate(ax/(log2/64))
+// Put the integer representation of M in r_M
+// and the floating point representation of M in f_M
+
+// Get the remaining A,B coefficients
+{ .mmi
+ ldfe f_A3 = [r_ad3],16
+ nop.m 0
+ nop.i 0
+}
+;;
+
+// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
+// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
+{ .mfi
+ nop.m 0
+ fma.s1 f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
+ mov r_signexp_0_5 = 0x0fffe // signexp of +0.5
+}
+;;
+
+// Test for |x| >= overflow limit
+{ .mfi
+ ldfe f_B1 = [r_ad3],16
+ fcmp.ge.s1 p6,p0 = f_ABS_X, f_smlst_oflow_input
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe f_B2 = [r_ad3],16
+ nop.f 0
+ mov r_exp_32 = 0x10004
+}
+;;
+
+// Subtract RSHF constant to get rounded M as a floating point value
+// M_temp * 2^(63-6) - 2^63
+{ .mfb
+ ldfe f_B3 = [r_ad3],16
+ fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
+(p6) br.cond.spnt COSH_HUGE // Branch if result will overflow
+}
+;;
+
+{ .mfi
+ getf.sig r_M = f_M_temp
+ nop.f 0
+ cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
+}
+;;
+
+// Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+
+// Calculate R
+// ax - M*log2by64_hi
+// R = (ax - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 0
+ fnma.s1 f_R_temp = f_M, f_log2by64_hi, f_ABS_X
+ and r_j = 0x3f, r_M
+}
+;;
+
+{ .mii
+ nop.m 0
+ shl r_jshf = r_j, 0x2 // Shift j so can sign extend it
+;;
+ sxt1 r_jshf = r_jshf
+}
+;;
+
+{ .mii
+ nop.m 0
+ shr r_j = r_jshf, 0x2 // Now j has range -32 to 31
+ nop.i 0
+}
+;;
+
+{ .mmi
+ shladd r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
+ sub r_Mmj = r_M, r_j // M-j
+ sub r_mj = r0, r_j // Form -j
+}
+;;
+
+// The TBL and EXP branches are merged and predicated
+// If TBL, p6 true, 0.25 <= |x| < 32
+// If EXP, p7 true, 32 <= |x| < overflow_limit
+//
+// N = (M-j)/64
+{ .mfi
+ ldfe f_Tjhi = [r_ad_J_hi]
+ fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
+ shr r_N = r_Mmj, 0x6 // N = (M-j)/64
+}
+{ .mfi
+ shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
+ nop.f 0
+ shladd r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
+}
+;;
+
+{ .mfi
+ sub r_2mNm1 = r_signexp_0_5, r_N // signexp 2^(-N-1)
+ nop.f 0
+ shladd r_ad_J_lo = r_j, 2, r_ad5 // pointer to Tjlo
+}
+{ .mfi
+ ldfe f_Tmjhi = [r_ad_mJ_hi]
+ nop.f 0
+ add r_2Nm1 = r_signexp_0_5, r_N // signexp 2^(N-1)
+}
+;;
+
+{ .mmf
+ ldfs f_Tmjlo = [r_ad_mJ_lo]
+ setf.exp f_sneg = r_2mNm1 // Form 2^(-N-1)
+ nop.f 0
+}
+;;
+
+{ .mmf
+ ldfs f_Tjlo = [r_ad_J_lo]
+ setf.exp f_spos = r_2Nm1 // Form 2^(N-1)
+ nop.f 0
+}
+;;
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+
+{ .mmf
+ nop.m 0
+ nop.m 0
+ fma.s1 f_Rsq = f_R, f_R, f0
+}
+;;
+
+
+// Calculate p_even
+// B_2 + Rsq *B_3
+// B_1 + Rsq * (B_2 + Rsq *B_3)
+// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+{ .mfi
+ nop.m 0
+ fma.s1 f_peven_temp1 = f_Rsq, f_B3, f_B2
+ nop.i 0
+}
+// Calculate p_odd
+// A_2 + Rsq *A_3
+// A_1 + Rsq * (A_2 + Rsq *A_3)
+// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+{ .mfi
+ nop.m 0
+ fma.s1 f_podd_temp1 = f_Rsq, f_A3, f_A2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_Rcub = f_Rsq, f_R, f0
+ nop.i 0
+}
+;;
+
+//
+// If TBL,
+// Calculate S_hi and S_lo, and C_hi
+// SC_hi_temp = sneg * Tmjhi
+// S_hi = spos * Tjhi - SC_hi_temp
+// S_hi = spos * Tjhi - (sneg * Tmjhi)
+// C_hi = spos * Tjhi + SC_hi_temp
+// C_hi = spos * Tjhi + (sneg * Tmjhi)
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
+ nop.i 0
+}
+;;
+
+// If TBL,
+// C_lo_temp3 = sneg * Tmjlo
+// C_lo_temp4 = spos * Tjlo + C_lo_temp3
+// C_lo_temp4 = spos * Tjlo + (sneg * Tmjlo)
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_lo_temp3 = f_sneg, f_Tmjlo, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
+ nop.i 0
+}
+;;
+
+// If EXP,
+// Compute 2^(N-1) * Tjhi and 2^(N-1) * Tjlo
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Tjhi_spos = f_Tjhi, f_spos, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Tjlo_spos = f_Tjlo, f_spos, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fms.s1 f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_lo_temp4 = f_spos, f_Tjlo, f_C_lo_temp3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_peven = f_Rsq, f_peven_temp2, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_podd = f_podd_temp2, f_Rcub, f_R
+ nop.i 0
+}
+;;
+
+// If TBL,
+// C_lo_temp1 = spos * Tjhi - C_hi
+// C_lo_temp2 = sneg * Tmjlo + C_lo_temp1
+// C_lo_temp2 = sneg * Tmjlo + (spos * Tjhi - C_hi)
+
+{ .mfi
+ nop.m 0
+(p6) fms.s1 f_C_lo_temp1 = f_spos, f_Tjhi, f_C_hi
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_lo_temp2 = f_sneg, f_Tmjhi, f_C_lo_temp1
+ nop.i 0
+}
+;;
+
+// If EXP,
+// Y_hi = 2^(N-1) * Tjhi
+// Y_lo = 2^(N-1) * Tjhi * (p_odd + p_even) + 2^(N-1) * Tjlo
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Y_lo_temp = f_peven, f1, f_podd
+ nop.i 0
+}
+;;
+
+// If TBL,
+// C_lo = C_lo_temp4 + C_lo_temp2
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_lo = f_C_lo_temp4, f1, f_C_lo_temp2
+ nop.i 0
+}
+;;
+
+// If TBL,
+// Y_hi = C_hi
+// Y_lo = S_hi*p_odd + (C_hi*p_even + C_lo)
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_Y_lo_temp = f_C_hi, f_peven, f_C_lo
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 f_tmp = f_B2, f_B2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_Y_lo = f_S_hi, f_podd, f_Y_lo_temp
+ nop.i 0
+}
+;;
+
+// f8 = answer = Y_hi + Y_lo
+{ .mfi
+ nop.m 0
+(p7) fma.s0 f8 = f_Y_lo, f1, f_Tjhi_spos
+ nop.i 0
+}
+;;
+
+// f8 = answer = Y_hi + Y_lo
+{ .mfb
+ nop.m 0
+(p6) fma.s0 f8 = f_Y_lo, f1, f_C_hi
+ br.ret.sptk b0 // Exit for COSH_BY_TBL and COSH_BY_EXP
+}
+;;
+
+
+// Here if 0 < |x| < 0.25
+COSH_BY_POLY:
+{ .mmf
+ ldfe f_P6 = [r_ad2e],16
+ ldfe f_P5 = [r_ad2o],16
+ nop.f 0
+}
+;;
+
+{ .mmi
+ ldfe f_P4 = [r_ad2e],16
+ ldfe f_P3 = [r_ad2o],16
+ nop.i 0
+}
+;;
+
+{ .mmi
+ ldfe f_P2 = [r_ad2e],16
+ ldfe f_P1 = [r_ad2o],16
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_X3 = f_NORM_X, f_X2, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_X4 = f_X2, f_X2, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly65 = f_X2, f_P6, f_P5
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly43 = f_X2, f_P4, f_P3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly21 = f_X2, f_P2, f_P1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly6543 = f_X4, f_poly65, f_poly43
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly6to1 = f_X4, f_poly6543, f_poly21
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 f_tmp = f_P6, f_P6
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s0 f8 = f_poly6to1, f_X2, f1
+ br.ret.sptk b0 // Exit COSH_BY_POLY
+}
+;;
+
+
+// Here if x denorm or unorm
+COSH_DENORM:
+// Determine if x really a denorm and not a unorm
+{ .mmf
+ getf.exp r_signexp_x = f_NORM_X
+ mov r_exp_denorm = 0x0c001 // Real denorms have exp < this
+ fmerge.s f_ABS_X = f0, f_NORM_X
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p10,p0 = f8, f0 // Set denorm flag
+ nop.i 0
+}
+;;
+
+// Set p8 if really a denorm
+{ .mmi
+ and r_exp_x = r_exp_mask, r_signexp_x
+;;
+ cmp.lt p8,p9 = r_exp_x, r_exp_denorm
+ nop.i 0
+}
+;;
+
+// Identify denormal operands.
+{ .mfb
+ nop.m 0
+(p8) fma.s0 f8 = f8,f8,f1 // If x denorm, result=1+x^2
+(p9) br.cond.sptk COSH_COMMON // Return to main path if x unorm
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+ br.ret.sptk b0 // Exit if x denorm
+}
+;;
+
+
+// Here if |x| >= overflow limit
+COSH_HUGE:
+// for COSH_HUGE, put 24000 in exponent; take sign from input
+{ .mmi
+ mov r_exp_huge = 0x15dbf
+;;
+ setf.exp f_huge = r_exp_huge
+ nop.i 0
+}
+;;
+
+{ .mfi
+ alloc r32 = ar.pfs,0,5,4,0
+ fma.s1 f_signed_hi_lo = f_huge, f1, f1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s0 f_pre_result = f_signed_hi_lo, f_huge, f0
+ mov GR_Parameter_TAG = 63
+}
+;;
+
+GLOBAL_IEEE754_END(coshl)
+libm_alias_ldouble_other (__cosh, cosh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_exp.S b/sysdeps/ia64/fpu/e_exp.S
new file mode 100644
index 0000000000..b2d8f7de49
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_exp.S
@@ -0,0 +1,800 @@
+.file "exp.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 2/02/00 Initial version
+// 3/07/00 exp(inf) = inf but now does NOT call error support
+// exp(-inf) = 0 but now does NOT call error support
+// 4/04/00 Unwind support added
+// 8/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/30/00 Reworked to shorten main path, widen main path to include all
+// args in normal range, and add quick exit for 0, nan, inf.
+// 12/05/00 Loaded constants earlier with setf to save 2 cycles.
+// 02/05/02 Corrected uninitialize predicate in POSSIBLE_UNDERFLOW path
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 09/07/02 Force inexact flag
+// 11/15/02 Split underflow path into zero/nonzero; eliminated fma in main path
+// 05/30/03 Set inexact flag on unmasked overflow/underflow
+// 03/31/05 Reformatted delimiters between data tables
+
+// API
+//==============================================================
+// double exp(double)
+
+// Overview of operation
+//==============================================================
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
+
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by 5th order polynomial
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
+
+
+// Special values
+//==============================================================
+// exp(+0) = 1.0
+// exp(-0) = 1.0
+
+// exp(+qnan) = +qnan
+// exp(-qnan) = -qnan
+// exp(+snan) = +qnan
+// exp(-snan) = -qnan
+
+// exp(-inf) = +0
+// exp(+inf) = +inf
+
+// Overflow and Underflow
+//=======================
+// exp(x) = largest double normal when
+// x = 709.7827 = 0x40862e42fefa39ef
+
+// exp(x) = smallest double normal when
+// x = -708.396 = 0xc086232bdd7abcd2
+
+// exp(x) = largest round-to-nearest single zero when
+// x = -745.1332 = 0xc0874910d52d3052
+
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6 -> f15, f32 -> f49
+
+// General registers used:
+// r14 -> r40
+
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//==============================================================
+
+rRshf = r14
+rAD_TB1 = r15
+rAD_T1 = r15
+rAD_TB2 = r16
+rAD_T2 = r16
+rAD_P = r17
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+rM = r21
+rBiased_M = r21
+rIndex_1_16 = r21
+rSig_inv_ln2 = r22
+rExp_bias = r23
+rExp_mask = r24
+rTmp = r25
+rRshf_2to56 = r26
+rGt_ln = r27
+rExp_2tom56 = r28
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP54 = f12
+fP5432 = f12
+fP4 = f13
+fP3 = f14
+fP32 = f14
+fP2 = f15
+fP = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fNormX = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+f2M = f41
+fS1 = f42
+fT1 = f42
+fS2 = f43
+fT2 = f43
+fS = f43
+fWre_urm_f8 = f44
+fFtz_urm_f8 = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_ZERO_ARG = f46
+fMAX_DBL_NORM_ARG = f47
+fMIN_DBL_NORM_ARG = f48
+fGt_pln = f49
+fTmp = f49
+
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 16
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is fNfloat.
+
+
+LOCAL_OBJECT_START(exp_table_1)
+data8 0x40862e42fefa39f0 // smallest dbl overflow arg, +709.7827
+data8 0xc0874910d52d3052 // largest arg for rnd-to-nearest 0 result, -745.133
+data8 0x40862e42fefa39ef // largest dbl arg to give normal dbl result, +709.7827
+data8 0xc086232bdd7abcd2 // smallest dbl arg to give normal dbl result, -708.396
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+//
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+//
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(exp_table_2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_2)
+
+
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(exp)
+
+{ .mlx
+ nop.m 0
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ addl rAD_TB1 = @ltoff(exp_table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
+}
+;;
+
+{ .mfi
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ mov rExp_mask = 0x1ffff
+}
+{ .mfi
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
+}
+;;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
+
+{ .mfi
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p9,p0 = f8,0x22 // Test for x=-inf
+ nop.i 0
+}
+{ .mlx
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_ZERO_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e1 // Test for x=+inf, nan, NaT
+ nop.i 0
+}
+{ .mfb
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+(p9) fma.d.s0 f8 = f0,f0,f0 // quick exit for x=-inf
+(p9) br.ret.spnt b0
+}
+;;
+
+{ .mfi
+ ldfpd fMAX_DBL_NORM_ARG, fMIN_DBL_NORM_ARG = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+(p8) fma.d.s0 f8 = f1,f1,f0 // quick exit for x=0
+(p8) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ ldfe fLn2_by_128_hi = [rAD_TB1],16
+(p10) fma.d.s0 f8 = f8,f8,f0 // Result if x=+inf, nan, NaT
+(p10) br.ret.spnt b0 // quick exit for x=+inf, nan, NaT
+}
+;;
+
+{ .mfi
+ ldfe fLn2_by_128_lo = [rAD_TB1],16
+ fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
+ nop.i 0
+}
+;;
+
+// After that last load, rAD_TB1 points to the beginning of table 1
+
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
+
+{ .mfi
+ nop.m 0
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ nop.i 0
+}
+;;
+
+// Divide arguments into the following categories:
+// Certain Underflow p11 - -inf < x <= MAX_DBL_ZERO_ARG
+// Possible Underflow p13 - MAX_DBL_ZERO_ARG < x < MIN_DBL_NORM_ARG
+// Certain Safe - MIN_DBL_NORM_ARG <= x <= MAX_DBL_NORM_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < x < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= x < +inf
+//
+// If the input is really a double arg, then there will never be
+// "Possible Overflow" arguments.
+//
+
+{ .mfi
+ add rAD_TB2 = 0x100, rAD_TB1
+ fcmp.ge.s1 p15,p0 = fNormX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
+}
+;;
+
+{ .mfi
+ add rAD_P = 0x80, rAD_TB2
+ fcmp.le.s1 p11,p0 = fNormX,fMAX_DBL_ZERO_ARG
+ nop.i 0
+}
+;;
+
+{ .mfb
+ ldfpd fP5, fP4 = [rAD_P] ,16
+ fcmp.gt.s1 p14,p0 = fNormX,fMAX_DBL_NORM_ARG
+(p15) br.cond.spnt EXP_CERTAIN_OVERFLOW
+}
+;;
+
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into rN.
+
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
+
+{ .mfb
+ ldfpd fP3, fP2 = [rAD_P]
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW
+}
+;;
+
+{ .mfi
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ nop.i 0
+}
+;;
+
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+// rIndex_1_16 has index_1 * 16
+
+// rM has true M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
+{ .mfi
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
+}
+{ .mfi
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ nop.i 0
+}
+;;
+
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
+
+{ .mmi
+ add rBiased_M = rExp_bias, rM
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
+}
+;;
+
+// Create Scale = 2^M
+{ .mmi
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
+}
+;;
+
+// Load T1 and T2
+{ .mfi
+ ldfe fT1 = [rAD_T1]
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.lt.s1 p13,p0 = fNormX,fMIN_DBL_NORM_ARG
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+ nop.i 0
+}
+;;
+
+{ .mbb
+ nop.m 0
+(p13) br.cond.spnt EXP_POSSIBLE_UNDERFLOW
+(p14) br.cond.spnt EXP_POSSIBLE_OVERFLOW
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Normal path exit
+}
+;;
+
+
+EXP_POSSIBLE_OVERFLOW:
+
+// Here if fMAX_DBL_NORM_ARG < x < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
+
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
+
+{ .mfi
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt EXP_CERTAIN_OVERFLOW // Branch if overflow
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+EXP_CERTAIN_OVERFLOW:
+{ .mmi
+ sub rTmp = rExp_mask, r0, 1
+;;
+ setf.exp fTmp = rTmp
+ nop.i 0
+}
+;;
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 14
+ fma.d.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
+}
+;;
+
+EXP_POSSIBLE_UNDERFLOW:
+
+// Here if fMAX_DBL_ZERO_ARG < x < fMIN_DBL_NORM_ARG
+// Underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x41 // Get user's round mode, set ftz
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.d.s2 fFtz_urm_f8 = fS, fP, fS // Result with ftz set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off ftz in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s1 p6, p7 = fFtz_urm_f8, f0 // Test for underflow
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS // Compute result, set I, maybe U
+ nop.i 0
+}
+;;
+
+{ .mbb
+ nop.m 0
+(p6) br.cond.spnt EXP_UNDERFLOW_COMMON // Branch if really underflow
+(p7) br.ret.sptk b0 // Exit if really no underflow
+}
+;;
+
+EXP_CERTAIN_UNDERFLOW:
+// Here if x < fMAX_DBL_ZERO_ARG
+// Result will be zero (or smallest denorm if round to +inf) with I, U set
+{ .mmi
+ mov rTmp = 1
+;;
+ setf.exp fTmp = rTmp // Form small normal
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.se fTmp = fTmp, fLn2_by_128_lo // Small with signif lsb 1
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
+ br.cond.sptk EXP_UNDERFLOW_COMMON
+}
+;;
+
+EXP_UNDERFLOW_COMMON:
+// Determine if underflow result is zero or nonzero
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ fcmp.eq.s1 p6, p0 = f8, f0
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fmerge.s FR_X = fNormX,fNormX
+(p6) br.cond.spnt EXP_UNDERFLOW_ZERO
+}
+;;
+
+EXP_UNDERFLOW_NONZERO:
+// Here if x < fMIN_DBL_NORM_ARG and result nonzero;
+// I, U are set
+{ .mfb
+ mov GR_Parameter_TAG = 15
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
+}
+;;
+
+EXP_UNDERFLOW_ZERO:
+// Here if x < fMIN_DBL_NORM_ARG and result zero;
+// I, U are set
+{ .mfb
+ mov GR_Parameter_TAG = 15
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
+}
+;;
+
+GLOBAL_IEEE754_END(exp)
+libm_hidden_def (__exp)
+libm_alias_double_other (__exp, exp)
+#ifdef SHARED
+.symver exp,exp@@GLIBC_2.29
+.weak __exp_compat
+.set __exp_compat,__exp
+.symver __exp_compat,exp@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_exp10.S b/sysdeps/ia64/fpu/e_exp10.S
new file mode 100644
index 0000000000..5edd877db9
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_exp10.S
@@ -0,0 +1,609 @@
+.file "exp10.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 08/25/00 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 09/06/02 Improved performance; no inexact flags on exact cases
+// 01/29/03 Added missing } to bundle templates
+// 12/16/04 Call error handling on underflow.
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//==============================================================
+// double exp10(double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// Let x= (K + fh + fl + r)/log2(10), where
+// K is an integer, fh= 0.b1 b2 b3 b4 b5,
+// fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0),
+// and |r|<2^{-11}
+// Th is a table that stores 2^fh (32 entries) rounded to
+// double extended precision (only mantissa is stored)
+// Tl is a table that stores 2^fl (32 entries) rounded to
+// double extended precision (only mantissa is stored)
+//
+// 10^x is approximated as
+// 2^K * Th [ f ] * Tl [ f ] * (1+c1*e+c1*r+c2*r^2+c3*r^3+c4*r^4),
+// where e= (x*log2(10)_hi-RN(x*log2(10)_hi))+log2(10)_lo*x
+
+// Note there are only 22 non-zero values that produce an exact result:
+// 1.0, 2.0, ... 22.0.
+// We test for these cases and use s1 to avoid setting the inexact flag.
+
+// Special values
+//==============================================================
+// exp10(0)= 1
+// exp10(+inf)= inf
+// exp10(-inf)= 0
+//
+
+// Registers used
+//==============================================================
+// r2-r3, r14-r40
+// f6-f15, f32-f52
+// p6-p12
+//
+
+#include
+
+
+GR_TBL_START = r2
+GR_LOG_TBL = r3
+
+GR_OF_LIMIT = r14
+GR_UF_LIMIT = r15
+GR_EXP_CORR = r16
+GR_F_low = r17
+GR_F_high = r18
+GR_K = r19
+GR_Flow_ADDR = r20
+
+GR_BIAS = r21
+GR_Fh = r22
+GR_Fh_ADDR = r23
+GR_EXPMAX = r24
+GR_BIAS53 = r25
+
+GR_ROUNDVAL = r26
+GR_SNORM_LIMIT = r26
+GR_MASK = r27
+GR_KF0 = r28
+GR_MASK_low = r29
+GR_COEFF_START = r30
+GR_exact_limit = r31
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+FR_COEFF1 = f6
+FR_COEFF2 = f7
+FR_R = f9
+FR_LOG2_10 = f10
+
+FR_2P53 = f11
+FR_KF0 = f12
+FR_COEFF3 = f13
+FR_COEFF4 = f14
+FR_UF_LIMIT = f15
+
+FR_OF_LIMIT = f32
+FR_DX_L210 = f33
+FR_ROUNDVAL = f34
+FR_KF = f35
+
+FR_2_TO_K = f36
+FR_T_low = f37
+FR_T_high = f38
+FR_P34 = f39
+FR_R2 = f40
+
+FR_P12 = f41
+FR_T_low_K = f42
+FR_P14 = f43
+FR_T = f44
+FR_P = f45
+
+FR_L2_10_low = f46
+FR_L2_10_high = f47
+FR_E0 = f48
+FR_E = f49
+FR_exact_limit = f50
+
+FR_int_x = f51
+FR_SNORM_LIMIT = f52
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+data8 0xd49a784bcd1b8afe, 0x00003fcb // log2(10)*2^(10-63)
+data8 0x9257edfe9b5fb698, 0x3fbf // log2(10)_low (bits 64...127)
+data8 0x3fac6b08d704a0c0, 0x3f83b2ab6fba4e77 // C_3 and C_4
+data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
+data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+// 2^{0.00000 b6 b7 b8 b9 b10}
+data8 0x8000000000000000, 0x8016302f17467628
+data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
+data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
+data8 0x80855ad965e88b83, 0x809ba2264dada76a
+data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
+data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
+data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
+data8 0x813801881d886f7b, 0x814e67cceb90502c
+data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
+data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
+data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
+data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
+data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
+data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
+data8 0x8272fb97b2a5894c, 0x828998760d01faf3
+data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
+//
+// 2^{0.b1 b2 b3 b4 b5}
+data8 0x8000000000000000, 0x82cd8698ac2ba1d7
+data8 0x85aac367cc487b14, 0x88980e8092da8527
+data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
+data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
+data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
+data8 0x9ef5326091a111ad, 0xa27043030c496818
+data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
+data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
+data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
+data8 0xbd08a39f580c36be, 0xc12c4cca66709456
+data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
+data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
+data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
+data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
+data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
+data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
+LOCAL_OBJECT_END(T_table)
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(exp10)
+
+
+{.mfi
+ alloc r32= ar.pfs, 1, 4, 4, 0
+ // will continue only for non-zero normal/denormal numbers
+ fclass.nm.unc p12, p7= f8, 0x1b
+ mov GR_BIAS53= 0xffff+63-10
+}
+{.mlx
+ // GR_TBL_START= pointer to log2(10), C_1...C_4 followed by T_table
+ addl GR_TBL_START= @ltoff(poly_coeffs), gp
+ movl GR_ROUNDVAL= 0x3fc00000 // 1.5 (SP)
+}
+;;
+
+{.mfi
+ ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
+ fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
+ nop.i 0
+}
+;;
+
+{.mlx
+ setf.exp FR_2P53= GR_BIAS53 // 2^{63-10}
+ movl GR_UF_LIMIT= 0xc07439b746e36b52 // (-2^10-51) / log2(10)
+}
+{.mlx
+ setf.s FR_ROUNDVAL= GR_ROUNDVAL
+ movl GR_OF_LIMIT= 0x40734413509f79fe // Overflow threshold
+}
+;;
+
+{.mlx
+ ldfe FR_LOG2_10= [ GR_COEFF_START ], 16 // load log2(10)*2^(10-63)
+ movl GR_SNORM_LIMIT= 0xc0733a7146f72a41 // Smallest normal threshold
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ (p12) br.cond.spnt SPECIAL_exp10 // Branch if nan, inf, zero
+}
+;;
+
+{.mmf
+ ldfe FR_L2_10_low= [ GR_COEFF_START ], 16 // load log2(10)_low
+ setf.d FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
+ fma.s0 f8= f8, f1, f0 // normalize x
+}
+;;
+
+{.mfi
+ ldfpd FR_COEFF3, FR_COEFF4= [ GR_COEFF_START ], 16 // load C_3, C_4
+ (p8) fcvt.fx.s1 FR_int_x = f8 // Convert x to integer
+ nop.i 0
+}
+{.mfi
+ setf.d FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
+ fma.s1 FR_KF0= f8, FR_LOG2_10, FR_ROUNDVAL // y= (x*log2(10)*2^10 +
+ // 1.5*2^63) * 2^(-63)
+ mov GR_EXP_CORR= 0xffff-126
+}
+;;
+
+{.mfi
+ setf.d FR_SNORM_LIMIT= GR_SNORM_LIMIT // Set smallest normal limit
+ fma.s1 FR_L2_10_high= FR_LOG2_10, FR_2P53, f0 // FR_LOG2_10= log2(10)_hi
+ nop.i 0
+}
+;;
+
+{.mfi
+ ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
+ fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)*2^(10-63)
+ mov GR_MASK= 1023
+}
+;;
+
+{.mfi
+ ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
+ fma.s1 FR_LOG2_10= f8, FR_L2_10_high, f0 // y0= x*log2(10)_hi
+ mov GR_MASK_low= 31
+}
+;;
+
+{.mlx
+ getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
+ (p8) movl GR_exact_limit= 0x41b00000 // Largest x for exact result,
+ // +22.0
+}
+;;
+
+{.mfi
+ add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
+ fcmp.gt.s1 p12, p7= f8, FR_OF_LIMIT // x>overflow threshold ?
+ nop.i 0
+}
+;;
+
+{.mfi
+ (p8) setf.s FR_exact_limit = GR_exact_limit // Largest x for exact result
+ (p8) fcvt.xf FR_int_x = FR_int_x // Integral part of x
+ shr GR_K= GR_KF0, 10 // K
+}
+{.mfi
+ and GR_F_high= GR_MASK, GR_KF0 // f_high*32
+ fnma.s1 FR_R= FR_KF, FR_2P53, FR_LOG2_10 // r= x*log2(10)-2^{63-10}*
+ // [ (K+f)*2^{10-63} ]
+ and GR_F_low= GR_KF0, GR_MASK_low // f_low
+}
+;;
+
+{.mmi
+ shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
+ add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
+ shr GR_Fh= GR_F_high, 5 // f_high
+}
+;;
+
+{.mfi
+ setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
+ (p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x
+#undef libm_alias_finite
+#define libm_alias_finite(a, b)
+#include
diff --git a/sysdeps/ia64/fpu/e_exp10l.S b/sysdeps/ia64/fpu/e_exp10l.S
new file mode 100644
index 0000000000..20a35c02dd
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_exp10l.S
@@ -0,0 +1,814 @@
+.file "exp10l.s"
+
+
+// Copyright (c) 2000 - 2004, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 08/25/00 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/06/03 Reordered header: .section, .global, .proc, .align
+// 05/08/03 Reformatted assembly source; corrected overflow result for round to
+// -inf and round to zero; exact results now don't set inexact flag
+// 12/16/04 Call error handling on underflow.
+//
+// API
+//==============================================================
+// long double exp10l(long double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// Let x= (K + f + r)/log2(10), where
+// K is an integer, f= 0.b1 b2... b8 (f>= 0),
+// and |r|<2^{-9}
+// T is a table that stores 2^f (256 entries) rounded to
+// double extended precision (only mantissa is stored)
+// D stores (2^f/T [ f ] - 1), rounded to single precision
+//
+// 10^x is approximated as
+// 2^K * T [ f ] * ((1+c1*r+c2*r^2+...+c6*r^6)*(1+c1*e)+D [ f ] ),
+// where e= log2(10)_lo*x+(log2(10)_hi*x-RN(log2(10)_hi*x))
+//
+
+
+
+// Special values
+//==============================================================
+// exp10(0)= 1
+// exp10(+inf)= inf
+// exp10(-inf)= 0
+//
+
+
+// Registers used
+//==============================================================
+// f6-f15, f32-f63
+// r14-r30, r32-r40
+// p6-p8, p11-p14
+//
+
+#include
+
+
+ FR_X = f10
+ FR_Y = f1
+ FR_RESULT = f8
+
+ FR_COEFF1 = f6
+ FR_COEFF2 = f7
+ FR_KF0 = f9
+ FR_LOG10 = f10
+ FR_CONST1 = f11
+ FR_XL10 = f12
+ FR_COEFF3 = f13
+ FR_COEFF4 = f14
+ FR_UF_TEST = f15
+ FR_OF_TEST = f32
+ FR_L10_LOW = f33
+ FR_COEFF5 = f34
+ FR_COEFF6 = f35
+ FR_L10 = f36
+ FR_C_L10 = f37
+ FR_XL10_H = f38
+ FR_XL10_L = f39
+ FR_KF = f40
+ FR_E = f41
+ FR_T = f42
+ FR_D = f43
+ FR_EXP_M_63 = f44
+ FR_R = f45
+ FR_E1 = f46
+ FR_COEFF2 = f47
+ FR_P34 = f48
+ FR_P56 = f49
+ FR_R2 = f50
+ FR_RE = f51
+ FR_D1 = f52
+ FR_P36 = f53
+ FR_R3E = f54
+ FR_P1 = f55
+ FR_P = f56
+ FR_T1 = f57
+ FR_XINT = f58
+ FR_XINTF = f59
+ FR_4 = f60
+ FR_28 = f61
+ FR_32 = f62
+ FR_SNORM_LIMIT = f63
+
+
+ GR_ADDR0 = r14
+ GR_D_ADDR = r15
+ GR_ADDR = r16
+ GR_B63 = r17
+ GR_KBITS = r18
+ GR_F = r19
+ GR_K = r20
+ GR_D = r21
+ GR_BM63 = r22
+ GR_T = r23
+ GR_CONST1 = r24
+ GR_EMIN = r25
+ GR_CONST2 = r26
+ GR_BM8 = r27
+ GR_SREG = r28
+ GR_4_BIAS = r29
+ GR_32_BIAS = r30
+
+ GR_SAVE_B0 = r33
+ GR_SAVE_PFS = r34
+ GR_SAVE_GP = r35
+ GR_SAVE_SP = r36
+
+ GR_Parameter_X = r37
+ GR_Parameter_Y = r38
+ GR_Parameter_RESULT= r39
+ GR_Parameter_TAG = r40
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+ data8 0xd49a784bcd1b8afe, 0x00004008 // log2(10)*2^8
+ data8 0x9a209a84fbcff798, 0x0000400b // overflow threshold
+ data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
+ data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
+ data8 0x3fac6b08d704a0c0 // C_3
+ data8 0x3f83b2ab6fba4e77 // C_4
+ data8 0x3f55d87fe78a6731 // C_5
+ data8 0x3f2430912f86c787 // C_6
+ data8 0x9257edfe9b5fb698, 0x00003fbf // log2(10)_low (bits 64...127)
+ data8 0x9a1bc98027a81918, 0x0000c00b // Smallest normal threshold
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+ // 2^{0.b1 b2 b3 b4 b5 b6 b7 b8}
+ data8 0x8000000000000000, 0x8058d7d2d5e5f6b1
+ data8 0x80b1ed4fd999ab6c, 0x810b40a1d81406d4
+ data8 0x8164d1f3bc030773, 0x81bea1708dde6056
+ data8 0x8218af4373fc25ec, 0x8272fb97b2a5894c
+ data8 0x82cd8698ac2ba1d7, 0x83285071e0fc4547
+ data8 0x8383594eefb6ee37, 0x83dea15b9541b132
+ data8 0x843a28c3acde4046, 0x8495efb3303efd30
+ data8 0x84f1f656379c1a29, 0x854e3cd8f9c8c95d
+ data8 0x85aac367cc487b15, 0x86078a2f23642a9f
+ data8 0x8664915b923fba04, 0x86c1d919caef5c88
+ data8 0x871f61969e8d1010, 0x877d2afefd4e256c
+ data8 0x87db357ff698d792, 0x88398146b919f1d4
+ data8 0x88980e8092da8527, 0x88f6dd5af155ac6b
+ data8 0x8955ee03618e5fdd, 0x89b540a7902557a4
+ data8 0x8a14d575496efd9a, 0x8a74ac9a79896e47
+ data8 0x8ad4c6452c728924, 0x8b3522a38e1e1032
+ data8 0x8b95c1e3ea8bd6e7, 0x8bf6a434adde0085
+ data8 0x8c57c9c4646f4dde, 0x8cb932c1bae97a95
+ data8 0x8d1adf5b7e5ba9e6, 0x8d7ccfc09c50e2f8
+ data8 0x8ddf042022e69cd6, 0x8e417ca940e35a01
+ data8 0x8ea4398b45cd53c0, 0x8f073af5a2013520
+ data8 0x8f6a8117e6c8e5c4, 0x8fce0c21c6726481
+ data8 0x9031dc431466b1dc, 0x9095f1abc540ca6b
+ data8 0x90fa4c8beee4b12b, 0x915eed13c89689d3
+ data8 0x91c3d373ab11c336, 0x9228ffdc10a051ad
+ data8 0x928e727d9531f9ac, 0x92f42b88f673aa7c
+ data8 0x935a2b2f13e6e92c, 0x93c071a0eef94bc1
+ data8 0x9426ff0fab1c04b6, 0x948dd3ac8ddb7ed3
+ data8 0x94f4efa8fef70961, 0x955c5336887894d5
+ data8 0x95c3fe86d6cc7fef, 0x962bf1cbb8d97560
+ data8 0x96942d3720185a00, 0x96fcb0fb20ac4ba3
+ data8 0x97657d49f17ab08e, 0x97ce9255ec4357ab
+ data8 0x9837f0518db8a96f, 0x98a1976f7597e996
+ data8 0x990b87e266c189aa, 0x9975c1dd47518c77
+ data8 0x99e0459320b7fa65, 0x9a4b13371fd166ca
+ data8 0x9ab62afc94ff864a, 0x9b218d16f441d63d
+ data8 0x9b8d39b9d54e5539, 0x9bf93118f3aa4cc1
+ data8 0x9c6573682ec32c2d, 0x9cd200db8a0774cb
+ data8 0x9d3ed9a72cffb751, 0x9dabfdff6367a2aa
+ data8 0x9e196e189d472420, 0x9e872a276f0b98ff
+ data8 0x9ef5326091a111ae, 0x9f6386f8e28ba651
+ data8 0x9fd228256400dd06, 0xa041161b3d0121be
+ data8 0xa0b0510fb9714fc2, 0xa11fd9384a344cf7
+ data8 0xa18faeca8544b6e4, 0xa1ffd1fc25cea188
+ data8 0xa27043030c496819, 0xa2e102153e918f9e
+ data8 0xa3520f68e802bb93, 0xa3c36b345991b47c
+ data8 0xa43515ae09e6809e, 0xa4a70f0c95768ec5
+ data8 0xa5195786be9ef339, 0xa58bef536dbeb6ee
+ data8 0xa5fed6a9b15138ea, 0xa6720dc0be08a20c
+ data8 0xa6e594cfeee86b1e, 0xa7596c0ec55ff55b
+ data8 0xa7cd93b4e965356a, 0xa8420bfa298f70d1
+ data8 0xa8b6d5167b320e09, 0xa92bef41fa77771b
+ data8 0xa9a15ab4ea7c0ef8, 0xaa1717a7b5693979
+ data8 0xaa8d2652ec907629, 0xab0386ef48868de1
+ data8 0xab7a39b5a93ed337, 0xabf13edf162675e9
+ data8 0xac6896a4be3fe929, 0xace0413ff83e5d04
+ data8 0xad583eea42a14ac6, 0xadd08fdd43d01491
+ data8 0xae493452ca35b80e, 0xaec22c84cc5c9465
+ data8 0xaf3b78ad690a4375, 0xafb51906e75b8661
+ data8 0xb02f0dcbb6e04584, 0xb0a957366fb7a3c9
+ data8 0xb123f581d2ac2590, 0xb19ee8e8c94feb09
+ data8 0xb21a31a66618fe3b, 0xb295cff5e47db4a4
+ data8 0xb311c412a9112489, 0xb38e0e38419fae18
+ data8 0xb40aaea2654b9841, 0xb487a58cf4a9c180
+ data8 0xb504f333f9de6484, 0xb58297d3a8b9f0d2
+ data8 0xb60093a85ed5f76c, 0xb67ee6eea3b22b8f
+ data8 0xb6fd91e328d17791, 0xb77c94c2c9d725e9
+ data8 0xb7fbefca8ca41e7c, 0xb87ba337a1743834
+ data8 0xb8fbaf4762fb9ee9, 0xb97c143756844dbf
+ data8 0xb9fcd2452c0b9deb, 0xba7de9aebe5fea09
+ data8 0xbaff5ab2133e45fb, 0xbb81258d5b704b6f
+ data8 0xbc034a7ef2e9fb0d, 0xbc85c9c560e7b269
+ data8 0xbd08a39f580c36bf, 0xbd8bd84bb67ed483
+ data8 0xbe0f6809860993e2, 0xbe935317fc378238
+ data8 0xbf1799b67a731083, 0xbf9c3c248e2486f8
+ data8 0xc0213aa1f0d08db0, 0xc0a6956e8836ca8d
+ data8 0xc12c4cca66709456, 0xc1b260f5ca0fbb33
+ data8 0xc238d2311e3d6673, 0xc2bfa0bcfad907c9
+ data8 0xc346ccda24976407, 0xc3ce56c98d21b15d
+ data8 0xc4563ecc5334cb33, 0xc4de8523c2c07baa
+ data8 0xc5672a115506dadd, 0xc5f02dd6b0bbc3d9
+ data8 0xc67990b5aa245f79, 0xc70352f04336c51e
+ data8 0xc78d74c8abb9b15d, 0xc817f681416452b2
+ data8 0xc8a2d85c8ffe2c45, 0xc92e1a9d517f0ecc
+ data8 0xc9b9bd866e2f27a3, 0xca45c15afcc72624
+ data8 0xcad2265e4290774e, 0xcb5eecd3b38597c9
+ data8 0xcbec14fef2727c5d, 0xcc799f23d11510e5
+ data8 0xcd078b86503dcdd2, 0xcd95da6a9ff06445
+ data8 0xce248c151f8480e4, 0xceb3a0ca5dc6a55d
+ data8 0xcf4318cf191918c1, 0xcfd2f4683f94eeb5
+ data8 0xd06333daef2b2595, 0xd0f3d76c75c5db8d
+ data8 0xd184df6251699ac6, 0xd2164c023056bcab
+ data8 0xd2a81d91f12ae45a, 0xd33a5457a3029054
+ data8 0xd3ccf099859ac379, 0xd45ff29e0972c561
+ data8 0xd4f35aabcfedfa1f, 0xd5872909ab75d18a
+ data8 0xd61b5dfe9f9bce07, 0xd6aff9d1e13ba2fe
+ data8 0xd744fccad69d6af4, 0xd7da67311797f56a
+ data8 0xd870394c6db32c84, 0xd9067364d44a929c
+ data8 0xd99d15c278afd7b6, 0xda3420adba4d8704
+ data8 0xdacb946f2ac9cc72, 0xdb63714f8e295255
+ data8 0xdbfbb797daf23755, 0xdc9467913a4f1c92
+ data8 0xdd2d818508324c20, 0xddc705bcd378f7f0
+ data8 0xde60f4825e0e9124, 0xdefb4e1f9d1037f2
+ data8 0xdf9612deb8f04420, 0xe031430a0d99e627
+ data8 0xe0ccdeec2a94e111, 0xe168e6cfd3295d23
+ data8 0xe2055afffe83d369, 0xe2a23bc7d7d91226
+ data8 0xe33f8972be8a5a51, 0xe3dd444c46499619
+ data8 0xe47b6ca0373da88d, 0xe51a02ba8e26d681
+ data8 0xe5b906e77c8348a8, 0xe658797368b3a717
+ data8 0xe6f85aaaee1fce22, 0xe798aadadd5b9cbf
+ data8 0xe8396a503c4bdc68, 0xe8da9958464b42ab
+ data8 0xe97c38406c4f8c57, 0xea1e4756550eb27b
+ data8 0xeac0c6e7dd24392f, 0xeb63b74317369840
+ data8 0xec0718b64c1cbddc, 0xecaaeb8ffb03ab41
+ data8 0xed4f301ed9942b84, 0xedf3e6b1d418a491
+ data8 0xee990f980da3025b, 0xef3eab20e032bc6b
+ data8 0xefe4b99bdcdaf5cb, 0xf08b3b58cbe8b76a
+ data8 0xf13230a7ad094509, 0xf1d999d8b7708cc1
+ data8 0xf281773c59ffb13a, 0xf329c9233b6bae9c
+ data8 0xf3d28fde3a641a5b, 0xf47bcbbe6db9fddf
+ data8 0xf5257d152486cc2c, 0xf5cfa433e6537290
+ data8 0xf67a416c733f846e, 0xf7255510c4288239
+ data8 0xf7d0df730ad13bb9, 0xf87ce0e5b2094d9c
+ data8 0xf92959bb5dd4ba74, 0xf9d64a46eb939f35
+ data8 0xfa83b2db722a033a, 0xfb3193cc4227c3f4
+ data8 0xfbdfed6ce5f09c49, 0xfc8ec01121e447bb
+ data8 0xfd3e0c0cf486c175, 0xfdedd1b496a89f35
+ data8 0xfe9e115c7b8f884c, 0xff4ecb59511ec8a5
+LOCAL_OBJECT_END(T_table)
+
+
+LOCAL_OBJECT_START(D_table)
+ data4 0x00000000, 0x9f55c08f, 0x1e93ffa3, 0x1dcd43a8
+ data4 0x1f751f79, 0x9f3cdd88, 0x9f43d155, 0x1eda222c
+ data4 0x1ef35513, 0x9f597895, 0x9e698881, 0x1ec71073
+ data4 0x1e50e371, 0x9dc01e19, 0x1de74133, 0x1e2f028c
+ data4 0x9edefb47, 0x1ebbac48, 0x9e8b0330, 0x9e9e9314
+ data4 0x1edc1d11, 0x1f098529, 0x9f52827c, 0x1f50050d
+ data4 0x1f301e8e, 0x1f5b64d1, 0x9f45e3ee, 0x9ef64d6d
+ data4 0x1d6ec5e8, 0x9e61ad9a, 0x1d44ccbb, 0x9e4a8bbb
+ data4 0x9cf11576, 0x9dcce7e7, 0x9d02ac90, 0x1f26ccf0
+ data4 0x9f0877c6, 0x9ddd62ae, 0x9f4b7fc3, 0x1ea8ef6b
+ data4 0x1ea4378d, 0x1ef6fc38, 0x1db99fd9, 0x1f22bf6f
+ data4 0x1f53e172, 0x1e85504a, 0x9f37cc75, 0x1f0c5e17
+ data4 0x1dde8aac, 0x9cb42bb2, 0x1e153cd7, 0x1eb62bba
+ data4 0x9e9b941b, 0x9ea80e3c, 0x1f508823, 0x1ec3fd36
+ data4 0x1e9ffaa1, 0x1e21e2eb, 0x9d948b1d, 0x9e8ac93a
+ data4 0x1ef7ee6f, 0x9e80dda3, 0x1f0814be, 0x1dc5ddfe
+ data4 0x1eedb9d1, 0x9f2aaa26, 0x9ea5b0fc, 0x1edf702e
+ data4 0x9e391201, 0x1f1316bb, 0x1ea27fb7, 0x9e05ed18
+ data4 0x9f199ed2, 0x1ee7fd7c, 0x1f003db6, 0x9eac3793
+ data4 0x9e5b8c10, 0x9f3af17c, 0x1bc9a8be, 0x1ee3c004
+ data4 0x9f19b1b2, 0x9f242ce9, 0x9ce67dd1, 0x9e4f6275
+ data4 0x1e20742c, 0x1eb9328a, 0x9f477153, 0x1d969718
+ data4 0x9f1e6c43, 0x1f2f67f4, 0x9f39c7e4, 0x9e3c4feb
+ data4 0x1da3956b, 0x9e7c685d, 0x1f280911, 0x9f0d8afb
+ data4 0x1e314b40, 0x9eb4f250, 0x9f1a34ad, 0x1ef5d5e7
+ data4 0x9f145496, 0x1e604827, 0x9f1e5195, 0x1e9c1fc0
+ data4 0x1efde521, 0x1e69b385, 0x1f316830, 0x9f244eae
+ data4 0x1f1787ec, 0x9e939971, 0x1f0bb393, 0x9f0511d6
+ data4 0x1ed919de, 0x1d8b7b28, 0x1e5ca4a9, 0x1e7c357b
+ data4 0x9e3ff8e8, 0x1eef53b5, 0x9ed22ed7, 0x1f16659b
+ data4 0x9f2db102, 0x9e2c6a78, 0x1f328d7d, 0x9f2fec3c
+ data4 0x1eb395bd, 0x9f242b84, 0x9e2683e6, 0x1ed71e68
+ data4 0x1efd1df5, 0x9e9eeafd, 0x9ed2249c, 0x1eef129a
+ data4 0x1d1ea44c, 0x9e81f7ff, 0x1eaf77c9, 0x9ee7a285
+ data4 0x1e1864ed, 0x9ee7edbb, 0x9e15a27d, 0x9ae61655
+ data4 0x1f1ff1a2, 0x1da29755, 0x9e5f46fb, 0x1e901236
+ data4 0x9eecfb9b, 0x9f204d2f, 0x1ec64685, 0x9eb809bd
+ data4 0x9e0026c5, 0x1d9f1da1, 0x1f142b49, 0x9f20f22e
+ data4 0x1f24b067, 0x1f185a4c, 0x9f09765c, 0x9ece902f
+ data4 0x1e2ca5db, 0x1e6de464, 0x9f071f67, 0x1f1518c3
+ data4 0x1ea13ded, 0x1f0b8414, 0x1edb6ad4, 0x9e548740
+ data4 0x9ea10efb, 0x1ee48a60, 0x1e7954c5, 0x9edad013
+ data4 0x9f21517d, 0x9e9b6e0c, 0x9ee7f9a6, 0x9ebd4298
+ data4 0x9d65b24e, 0x1eed751f, 0x9f1573ea, 0x9d430377
+ data4 0x9e13fc0c, 0x1e47008a, 0x1e3d5c1d, 0x1ef41a91
+ data4 0x9e4a4ef7, 0x9e952f18, 0x1d620566, 0x1d9b8d33
+ data4 0x1db06247, 0x1e94b31e, 0x1f0730ad, 0x9d79ffb4
+ data4 0x1ed64d51, 0x9e91fd11, 0x9e28d35a, 0x9dea0ed9
+ data4 0x1e891def, 0x9ee28ac0, 0x1e1db99b, 0x9ee1ce38
+ data4 0x9bdd9bca, 0x1eb72cb9, 0x9e8c53c6, 0x1e0df6ca
+ data4 0x1e8f2ccd, 0x9e9b0886, 0x1eeb3bc7, 0x1ec7e772
+ data4 0x9e210776, 0x9daf246c, 0x1ea1f151, 0x1ece4dc6
+ data4 0x1ce741c8, 0x1ed3c88f, 0x9ec9a4fd, 0x9e0c8d30
+ data4 0x1d2fbb26, 0x9ef212a7, 0x1ee44f1c, 0x9e445550
+ data4 0x1e075f77, 0x9d9291a3, 0x1f09c2ee, 0x9e012c88
+ data4 0x1f057d62, 0x9e7bb0dc, 0x9d8758ee, 0x1ee8d6c1
+ data4 0x9e509a57, 0x9e4ca7b7, 0x1e2cb341, 0x9ec35106
+ data4 0x1ecf3baf, 0x1e11781c, 0x1ea0cc78, 0x1eb75ca6
+ data4 0x1e961e1a, 0x1eb88853, 0x1e7abf50, 0x1ee38704
+ data4 0x9dc5ab0f, 0x1afe197b, 0x9ec07523, 0x9d9b7f78
+ data4 0x1f011618, 0x1ed43b0b, 0x9f035945, 0x9e3fd014
+ data4 0x9bbda5cd, 0x9e83f8ab, 0x1e58a928, 0x1e392d61
+ data4 0x1efdbb52, 0x1ee310a8, 0x9ec7ecc1, 0x1e8c9ed6
+ data4 0x9ef82dee, 0x9e70545b, 0x9ea53fc4, 0x1e40f419
+LOCAL_OBJECT_END(D_table)
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(exp10l)
+
+{.mfi
+ alloc GR_SREG = ar.pfs, 1, 4, 4, 0
+ // will continue only for normal/denormal numbers
+ fclass.nm.unc p12, p7 = f8, 0x1b
+ // GR_ADDR0 = pointer to log2(10), C_1...C_6 followed by T_table
+ addl GR_ADDR0 = @ltoff(poly_coeffs), gp ;;
+}
+
+{.mfi
+ // load start address for C_1...C_6 followed by T_table
+ ld8 GR_ADDR0 = [ GR_ADDR0 ]
+ // X<0 ?
+ fcmp.lt.s1 p6, p8 = f8, f0
+ // GR_BM8 = bias-8
+ mov GR_BM8 = 0xffff-8
+}
+{.mlx
+ nop.m 0
+ // GR_EMIN = (-2^14-62)*2^{8}
+ movl GR_EMIN = 0xca807c00 ;;
+}
+
+{.mmb
+ // FR_CONST1 = 2^{-8}
+ setf.exp FR_CONST1 = GR_BM8
+ // load log2(10)*2^8
+ ldfe FR_LOG10 = [ GR_ADDR0 ], 16
+ (p12) br.cond.spnt SPECIAL_EXP10 ;;
+}
+
+{.mmf
+ setf.s FR_UF_TEST = GR_EMIN
+ // load overflow threshold
+ ldfe FR_OF_TEST = [ GR_ADDR0 ], 16
+ // normalize x
+ fma.s0 f8 = f8, f1, f0 ;;
+}
+
+{.mmi
+ // load C_1
+ ldfe FR_COEFF1 = [ GR_ADDR0 ], 16 ;;
+ // load C_2
+ ldfe FR_COEFF2 = [ GR_ADDR0 ], 16
+ nop.i 0 ;;
+}
+
+{.mmf
+ // GR_D_ADDR = pointer to D table
+ add GR_D_ADDR = 2048-64+96+32, GR_ADDR0
+ // load C_3, C_4
+ ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR0 ], 16
+ // y = x*log2(10)*2^8
+ fma.s1 FR_XL10 = f8, FR_LOG10, f0 ;;
+}
+
+{.mfi
+ // load C_5, C_6
+ ldfpd FR_COEFF5, FR_COEFF6 = [ GR_ADDR0 ], 16
+ // get int(x)
+ fcvt.fx.trunc.s1 FR_XINT = f8
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // FR_LOG10 = log2(10)
+ fma.s1 FR_L10 = FR_LOG10, FR_CONST1, f0
+ nop.i 0 ;;
+}
+
+{.mfi
+ // load log2(10)_low
+ ldfe FR_L10_LOW = [ GR_ADDR0 ], 16
+ // y0 = x*log2(10) = x*log2(10)_hi
+ fma.s1 FR_LOG10 = f8, FR_L10, f0
+ mov GR_EMIN = 0xffff-63
+}
+{.mfi
+ mov GR_32_BIAS = 0xffff + 5
+ // (K+f)*2^8 = round_to_int(y)
+ fcvt.fx.s1 FR_KF0 = FR_XL10
+ mov GR_4_BIAS = 0xffff + 2;;
+}
+
+{.mfi
+ // load smallest normal limit
+ ldfe FR_SNORM_LIMIT = [ GR_ADDR0 ], 16
+ // x>overflow threshold ?
+ fcmp.gt.s1 p12, p7 = f8, FR_OF_TEST
+ nop.i 0 ;;
+}
+
+{.mfi
+ setf.exp FR_32 = GR_32_BIAS
+ // x= smallest normal limit
+ fcmp.ge.s1 p11, p0 = f8, FR_SNORM_LIMIT
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // P36 = P34+r2*P56
+ fma.s1 FR_COEFF4 = FR_COEFF5, FR_COEFF3, FR_COEFF4
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // GR_D_ADDR = r'*r2
+ fma.s1 FR_COEFF3 = FR_COEFF3, FR_KF0, f0
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // is input below 28.0 ?
+ (p13) fcmp.lt.s1 p13, p14 = f8, FR_28
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P' = P12*r'+D'
+ fma.s1 FR_COEFF2 = FR_COEFF2, FR_KF0, FR_OF_TEST
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // P = P'+r3*P36
+ fma.s1 FR_COEFF3 = FR_COEFF3, FR_COEFF4, FR_COEFF2
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // T = 2^{K-63}*T
+ fma.s1 FR_UF_TEST = FR_UF_TEST, FR_XL10, f0
+ nop.i 0 ;;
+}
+
+.pred.rel "mutex",p13,p14
+{.mfi
+ nop.m 0
+ (p13) fma.s1 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
+ nop.i 0
+}
+{.mfb
+ nop.m 0
+ // result = T+T*P
+ (p14) fma.s0 f8 = FR_COEFF3, FR_UF_TEST, FR_UF_TEST
+ // return
+ (p11) br.ret.sptk b0 ;; // return, if result normal
+}
+
+// Here if result in denormal range (and not zero)
+{.mib
+ nop.m 0
+ mov GR_Parameter_TAG= 264
+ br.cond.sptk __libm_error_region // Branch to error handling
+}
+;;
+
+SPECIAL_EXP10:
+
+{.mfi
+ nop.m 0
+ // x = -Infinity ?
+ fclass.m p6, p0 = f8, 0x22
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // x = +Infinity ?
+ fclass.m p7, p0 = f8, 0x21
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // x = +/-Zero ?
+ fclass.m p8, p0 = f8, 0x7
+ nop.i 0
+}
+{.mfb
+ nop.m 0
+ // exp10(-Infinity) = 0
+ (p6) mov f8 = f0
+ (p6) br.ret.spnt b0 ;;
+}
+
+{.mfb
+ nop.m 0
+ // exp10(+Infinity) = +Infinity
+ nop.f 0
+ (p7) br.ret.spnt b0 ;;
+}
+
+{.mfb
+ nop.m 0
+ // exp10(+/-0) = 1
+ (p8) mov f8 = f1
+ (p8) br.ret.spnt b0 ;;
+}
+
+{.mfb
+ nop.m 0
+ // Remaining cases: NaNs
+ fma.s0 f8 = f8, f1, f0
+ br.ret.sptk b0 ;;
+}
+
+
+OUT_RANGE_EXP10:
+
+// underflow: p6 = 1
+// overflow: p8 = 1
+
+.pred.rel "mutex",p6,p8
+{.mmi
+ (p8) mov GR_CONST1 = 0x1fffe
+ (p6) mov GR_CONST1 = 1
+ nop.i 0
+}
+;;
+
+{.mii
+ setf.exp FR_KF0 = GR_CONST1
+ (p8) mov GR_Parameter_TAG = 165
+ (p6) mov GR_Parameter_TAG = 264
+}
+;;
+
+{.mfb
+ nop.m 999
+ fma.s0 f8 = FR_KF0, FR_KF0, f0 // Create overflow/underflow
+ br.cond.sptk __libm_error_region // Branch to error handling
+}
+;;
+
+GLOBAL_IEEE754_END(exp10l)
+libm_alias_ldouble_other (__exp10, exp10)
+#if SHLIB_COMPAT (libm, GLIBC_2_1, GLIBC_2_27)
+compat_symbol (libm, exp10l, pow10l, GLIBC_2_2)
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{.mfi
+ add GR_Parameter_Y = -32, sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs, GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
+}
+
+{.mfi
+.fframe 64
+ add sp = -64, sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP = gp ;; // Save gp
+}
+
+{.mmi
+ stfe [ GR_Parameter_Y ] = FR_Y, 16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16, sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0 = b0 ;; // Save b0
+}
+
+.body
+{.mib
+ stfe [ GR_Parameter_X ] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0, GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{.mib
+ stfe [ GR_Parameter_Y ] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16, GR_Parameter_Y
+ br.call.sptk b0 = __libm_error_support# ;; // Call error handling function
+}
+
+{.mmi
+ add GR_Parameter_RESULT = 48, sp
+ nop.m 0
+ nop.i 0 ;;
+}
+
+{.mmi
+ ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
+.restore sp
+ add sp = 64, sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 ;; // Restore return address
+}
+
+{.mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 ;; // Return
+}
+
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#, @function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_exp2.S b/sysdeps/ia64/fpu/e_exp2.S
new file mode 100644
index 0000000000..c2a116e2d2
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_exp2.S
@@ -0,0 +1,570 @@
+.file "exp2.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 08/25/00 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 09/05/02 Improved performance
+// 01/17/03 Fixed to call error support when x=1024.0
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//==============================================================
+// double exp2(double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// Let x= (K + fh + fl + r), where
+// K is an integer, fh= 0.b1 b2 b3 b4 b5,
+// fl= 2^{-5}* 0.b6 b7 b8 b8 b10 (fh, fl >= 0),
+// and |r|<2^{-11}
+// Th is a table that stores 2^fh (32 entries) rounded to
+// double extended precision (only mantissa is stored)
+// Tl is a table that stores 2^fl (32 entries) rounded to
+// double extended precision (only mantissa is stored)
+//
+// 2^x is approximated as
+// 2^K * Th [ f ] * Tl [ f ] * (1+c1*r+c2*r^2+c3*r^3+c4*r^4)
+
+// Note: We use the following trick to speed up conversion from FP to integer:
+//
+// Let x = K + r, where K is an integer, and |r| <= 0.5
+// Let N be the number of significand bits for the FP format used
+// ( N=64 for double-extended, N=53 for double)
+//
+// Then let y = 1.5 * 2^(N-1) + x for RN mode
+// K = y - 1.5 * 2^(N-1)
+// r = x - K
+//
+// If we want to obtain the integer part and the first m fractional bits of x,
+// we can use the same trick, but with a constant of 1.5 * 2^(N-1-m):
+//
+// Let x = K + f + r
+// f = 0.b_1 b_2 ... b_m
+// |r| <= 2^(-m-1)
+//
+// Then let y = 1.5 * 2^(N-1-m) + x for RN mode
+// (K+f) = y - 1.5 * 2^(N-1-m)
+// r = x - K
+
+
+// Special values
+//==============================================================
+// exp2(0)= 1
+// exp2(+inf)= inf
+// exp2(-inf)= 0
+//
+
+// Registers used
+//==============================================================
+// r2-r3, r14-r40
+// f6-f15, f32-f45
+// p6-p8, p12
+//
+
+
+GR_TBL_START = r2
+GR_LOG_TBL = r3
+
+GR_OF_LIMIT = r14
+GR_UF_LIMIT = r15
+GR_EXP_CORR = r16
+GR_F_low = r17
+GR_F_high = r18
+GR_K = r19
+GR_Flow_ADDR = r20
+
+GR_BIAS = r21
+GR_Fh = r22
+GR_Fh_ADDR = r23
+GR_EXPMAX = r24
+GR_EMIN = r25
+
+GR_ROUNDVAL = r26
+GR_MASK = r27
+GR_KF0 = r28
+GR_MASK_low = r29
+GR_COEFF_START = r30
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+FR_COEFF1 = f6
+FR_COEFF2 = f7
+FR_R = f9
+
+FR_KF0 = f12
+FR_COEFF3 = f13
+FR_COEFF4 = f14
+FR_UF_LIMIT = f15
+
+FR_OF_LIMIT = f32
+FR_EXPMIN = f33
+FR_ROUNDVAL = f34
+FR_KF = f35
+
+FR_2_TO_K = f36
+FR_T_low = f37
+FR_T_high = f38
+FR_P34 = f39
+FR_R2 = f40
+
+FR_P12 = f41
+FR_T_low_K = f42
+FR_P14 = f43
+FR_T = f44
+FR_P = f45
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+data8 0x3fac6b08d704a0c0, 0x3f83b2ab6fba4e77 // C_3 and C_4
+data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
+data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+// 2^{0.00000 b6 b7 b8 b9 b10}
+data8 0x8000000000000000, 0x8016302f17467628
+data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
+data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
+data8 0x80855ad965e88b83, 0x809ba2264dada76a
+data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
+data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
+data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
+data8 0x813801881d886f7b, 0x814e67cceb90502c
+data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
+data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
+data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
+data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
+data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
+data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
+data8 0x8272fb97b2a5894c, 0x828998760d01faf3
+data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
+//
+// 2^{0.b1 b2 b3 b4 b5}
+data8 0x8000000000000000, 0x82cd8698ac2ba1d7
+data8 0x85aac367cc487b14, 0x88980e8092da8527
+data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
+data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
+data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
+data8 0x9ef5326091a111ad, 0xa27043030c496818
+data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
+data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
+data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
+data8 0xbd08a39f580c36be, 0xc12c4cca66709456
+data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
+data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
+data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
+data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
+data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
+data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
+LOCAL_OBJECT_END(T_table)
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(exp2)
+
+
+{.mfi
+ alloc r32= ar.pfs, 1, 4, 4, 0
+ // will continue only for non-zero normal/denormal numbers
+ fclass.nm p12, p0= f8, 0x1b
+ // GR_TBL_START= pointer to C_1...C_4 followed by T_table
+ addl GR_TBL_START= @ltoff(poly_coeffs), gp
+}
+{.mlx
+ mov GR_OF_LIMIT= 0xffff + 10 // Exponent of overflow limit
+ movl GR_ROUNDVAL= 0x5a400000 // 1.5*2^(63-10) (SP)
+}
+;;
+
+// Form special constant 1.5*2^(63-10) to give integer part and first 10
+// fractional bits of x
+{.mfi
+ setf.s FR_ROUNDVAL= GR_ROUNDVAL // Form special constant
+ fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
+ nop.i 0
+}
+{.mfb
+ ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
+ nop.f 0
+ (p12) br.cond.spnt SPECIAL_exp2 // Branch if nan, inf, zero
+}
+;;
+
+{.mlx
+ setf.exp FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
+ movl GR_UF_LIMIT= 0xc4866000 // (-2^10-51) = -1075
+}
+;;
+
+{.mfi
+ ldfpd FR_COEFF3, FR_COEFF4= [ GR_COEFF_START ], 16 // load C_3, C_4
+ fma.s0 f8= f8, f1, f0 // normalize x
+ nop.i 0
+}
+;;
+
+{.mmi
+ setf.s FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
+ ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
+ mov GR_EXP_CORR= 0xffff-126
+}
+;;
+
+{.mfi
+ ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
+ fma.s1 FR_KF0= f8, f1, FR_ROUNDVAL // y= x + 1.5*2^(63-10)
+ nop.i 0
+}
+;;
+
+{.mfi
+ mov GR_MASK= 1023
+ fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)
+ mov GR_MASK_low= 31
+}
+;;
+
+{.mfi
+ getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
+ fcmp.ge.s1 p12, p7= f8, FR_OF_LIMIT // x >= overflow threshold ?
+ add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
+}
+;;
+
+{.mmi
+ and GR_F_low= GR_KF0, GR_MASK_low // f_low
+ and GR_F_high= GR_MASK, GR_KF0 // f_high*32
+ shr GR_K= GR_KF0, 10 // K
+}
+;;
+
+{.mmi
+ shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
+ add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
+ shr GR_Fh= GR_F_high, 5 // f_high
+}
+;;
+
+{.mfi
+ setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
+ fnma.s1 FR_R= FR_KF, f1, f8 // r= x - (K+f)
+ shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high}
+}
+{.mlx
+ ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low}
+ movl GR_EMIN= 0xc47f8000 // EMIN= -1022
+}
+;;
+
+{.mfi
+ ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
+ (p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x= 0),
+// and |r|<2^{-11}
+// Th is a table that stores 2^fh (32 entries) rounded to
+// double extended precision (only mantissa is stored)
+// Tl is a table that stores 2^fl (32 entries) rounded to
+// double extended precision (only mantissa is stored)
+//
+// 2^x is approximated as
+// 2^K * Th [ f ] * Tl [ f ] * (1+c1*r+c2*r^2)
+
+// Note: We use the following trick to speed up conversion from FP to integer:
+//
+// Let x = K + r, where K is an integer, and |r| <= 0.5
+// Let N be the number of significand bits for the FP format used
+// ( N=64 for double-extended, N=53 for double)
+//
+// Then let y = 1.5 * 2^(N-1) + x for RN mode
+// K = y - 1.5 * 2^(N-1)
+// r = x - K
+//
+// If we want to obtain the integer part and the first m fractional bits of x,
+// we can use the same trick, but with a constant of 1.5 * 2^(N-1-m):
+//
+// Let x = K + f + r
+// f = 0.b_1 b_2 ... b_m
+// |r| <= 2^(-m-1)
+//
+// Then let y = 1.5 * 2^(N-1-m) + x for RN mode
+// (K+f) = y - 1.5 * 2^(N-1-m)
+// r = x - K
+
+
+// Special values
+//==============================================================
+// exp2(0)= 1
+// exp2(+inf)= inf
+// exp2(-inf)= 0
+//
+
+// Registers used
+//==============================================================
+// r2-r3, r14-r40
+// f6-f15, f32-f45
+// p6-p8, p12
+//
+
+
+GR_TBL_START = r2
+GR_LOG_TBL = r3
+
+GR_OF_LIMIT = r14
+GR_UF_LIMIT = r15
+GR_EXP_CORR = r16
+GR_F_low = r17
+GR_F_high = r18
+GR_K = r19
+GR_Flow_ADDR = r20
+
+GR_BIAS = r21
+GR_Fh = r22
+GR_Fh_ADDR = r23
+GR_EXPMAX = r24
+GR_EMIN = r25
+
+GR_ROUNDVAL = r26
+GR_MASK = r27
+GR_KF0 = r28
+GR_MASK_low = r29
+GR_COEFF_START = r30
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+FR_COEFF1 = f6
+FR_COEFF2 = f7
+FR_R = f9
+
+FR_KF0 = f12
+FR_UF_LIMIT = f15
+
+FR_OF_LIMIT = f32
+FR_EXPMIN = f33
+FR_ROUNDVAL = f34
+FR_KF = f35
+
+FR_2_TO_K = f36
+FR_T_low = f37
+FR_T_high = f38
+
+FR_P12 = f41
+FR_T_low_K = f42
+FR_T = f44
+FR_P = f45
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
+data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+// 2^{0.00000 b6 b7 b8 b9 b10}
+data8 0x8000000000000000, 0x8016302f17467628
+data8 0x802c6436d0e04f50, 0x80429c17d77c18ed
+data8 0x8058d7d2d5e5f6b0, 0x806f17687707a7af
+data8 0x80855ad965e88b83, 0x809ba2264dada76a
+data8 0x80b1ed4fd999ab6c, 0x80c83c56b50cf77f
+data8 0x80de8f3b8b85a0af, 0x80f4e5ff089f763e
+data8 0x810b40a1d81406d4, 0x81219f24a5baa59d
+data8 0x813801881d886f7b, 0x814e67cceb90502c
+data8 0x8164d1f3bc030773, 0x817b3ffd3b2f2e47
+data8 0x8191b1ea15813bfd, 0x81a827baf7838b78
+data8 0x81bea1708dde6055, 0x81d51f0b8557ec1c
+data8 0x81eba08c8ad4536f, 0x820225f44b55b33b
+data8 0x8218af4373fc25eb, 0x822f3c7ab205c89a
+data8 0x8245cd9ab2cec048, 0x825c62a423d13f0c
+data8 0x8272fb97b2a5894c, 0x828998760d01faf3
+data8 0x82a0393fe0bb0ca8, 0x82b6ddf5dbc35906
+//
+// 2^{0.b1 b2 b3 b4 b5}
+data8 0x8000000000000000, 0x82cd8698ac2ba1d7
+data8 0x85aac367cc487b14, 0x88980e8092da8527
+data8 0x8b95c1e3ea8bd6e6, 0x8ea4398b45cd53c0
+data8 0x91c3d373ab11c336, 0x94f4efa8fef70961
+data8 0x9837f0518db8a96f, 0x9b8d39b9d54e5538
+data8 0x9ef5326091a111ad, 0xa27043030c496818
+data8 0xa5fed6a9b15138ea, 0xa9a15ab4ea7c0ef8
+data8 0xad583eea42a14ac6, 0xb123f581d2ac258f
+data8 0xb504f333f9de6484, 0xb8fbaf4762fb9ee9
+data8 0xbd08a39f580c36be, 0xc12c4cca66709456
+data8 0xc5672a115506dadd, 0xc9b9bd866e2f27a2
+data8 0xce248c151f8480e3, 0xd2a81d91f12ae45a
+data8 0xd744fccad69d6af4, 0xdbfbb797daf23755
+data8 0xe0ccdeec2a94e111, 0xe5b906e77c8348a8
+data8 0xeac0c6e7dd24392e, 0xefe4b99bdcdaf5cb
+data8 0xf5257d152486cc2c, 0xfa83b2db722a033a
+LOCAL_OBJECT_END(T_table)
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(exp2f)
+
+
+{.mfi
+ alloc r32= ar.pfs, 1, 4, 4, 0
+ // will continue only for non-zero normal/denormal numbers
+ fclass.nm p12, p0= f8, 0x1b
+ // GR_TBL_START= pointer to C_1...C_2 followed by T_table
+ addl GR_TBL_START= @ltoff(poly_coeffs), gp
+}
+{.mlx
+ mov GR_OF_LIMIT= 0xffff + 7 // Exponent of overflow limit
+ movl GR_ROUNDVAL= 0x5a400000 // 1.5*2^(63-10) (SP)
+}
+;;
+
+// Form special constant 1.5*2^(63-10) to give integer part and first 10
+// fractional bits of x
+{.mfi
+ setf.s FR_ROUNDVAL= GR_ROUNDVAL // Form special constant
+ fcmp.lt.s1 p6, p8= f8, f0 // X<0 ?
+ nop.i 0
+}
+{.mfb
+ ld8 GR_COEFF_START= [ GR_TBL_START ] // Load pointer to coeff table
+ nop.f 0
+ (p12) br.cond.spnt SPECIAL_exp2 // Branch if nan, inf, zero
+}
+;;
+
+{.mlx
+ setf.exp FR_OF_LIMIT= GR_OF_LIMIT // Set overflow limit
+ movl GR_UF_LIMIT= 0xc3160000 // (-2^7-22) = -150
+}
+;;
+
+{.mfi
+ ldfe FR_COEFF1= [ GR_COEFF_START ], 16 // load C_1
+ fma.s0 f8= f8, f1, f0 // normalize x
+ nop.i 0
+}
+;;
+
+{.mmi
+ ldfe FR_COEFF2= [ GR_COEFF_START ], 16 // load C_2
+ setf.s FR_UF_LIMIT= GR_UF_LIMIT // Set underflow limit
+ mov GR_EXP_CORR= 0xffff-126
+}
+;;
+
+{.mfi
+ nop.m 0
+ fma.s1 FR_KF0= f8, f1, FR_ROUNDVAL // y= x + 1.5*2^(63-10)
+ nop.i 0
+}
+;;
+
+{.mfi
+ mov GR_MASK= 1023
+ fms.s1 FR_KF= FR_KF0, f1, FR_ROUNDVAL // (K+f)
+ mov GR_MASK_low= 31
+}
+;;
+
+{.mfi
+ getf.sig GR_KF0= FR_KF0 // (K+f)*2^10= round_to_int(y)
+ fcmp.ge.s1 p12, p7= f8, FR_OF_LIMIT // x >= overflow threshold ?
+ add GR_LOG_TBL= 256, GR_COEFF_START // Pointer to high T_table
+}
+;;
+
+{.mmi
+ and GR_F_low= GR_KF0, GR_MASK_low // f_low
+ and GR_F_high= GR_MASK, GR_KF0 // f_high*32
+ shr GR_K= GR_KF0, 10 // K
+}
+;;
+
+{.mmi
+ shladd GR_Flow_ADDR= GR_F_low, 3, GR_COEFF_START // address of 2^{f_low}
+ add GR_BIAS= GR_K, GR_EXP_CORR // K= bias-2*63
+ shr GR_Fh= GR_F_high, 5 // f_high
+}
+;;
+
+{.mfi
+ setf.exp FR_2_TO_K= GR_BIAS // 2^{K-126}
+ fnma.s1 FR_R= FR_KF, f1, f8 // r= x - (K+f)
+ shladd GR_Fh_ADDR= GR_Fh, 3, GR_LOG_TBL // address of 2^{f_high}
+}
+{.mlx
+ ldf8 FR_T_low= [ GR_Flow_ADDR ] // load T_low= 2^{f_low}
+ movl GR_EMIN= 0xc2fc0000 // EMIN= -126
+}
+;;
+
+{.mfi
+ ldf8 FR_T_high= [ GR_Fh_ADDR ] // load T_high= 2^{f_high}
+ (p7) fcmp.lt.s1 p12, p7= f8, FR_UF_LIMIT // x= 0),
+// and |r|<2^{-8}
+// T is a table that stores 2^f (256 entries) rounded to
+// double extended precision (only mantissa is stored)
+// D stores (2^f/T [ f ] - 1), rounded to single precision
+//
+// 2^x is approximated as
+// 2^K * T [ f ] * (1+D [ f ] +c1*r+c2*r^2+...+c6*r^6)
+//
+
+
+
+// Special values
+//==============================================================
+// exp2(0)= 1
+// exp2(+inf)= inf
+// exp2(-inf)= 0
+//
+
+
+// Registers used
+//==============================================================
+// f6-f15, f32-f46
+// r2-r3, r8-r11, r14-r40
+// p6, p7, p8, p12
+
+ FR_X = f10
+ FR_Y = f1
+ FR_RESULT = f8
+
+ FR_KF0 = f6
+ FR_EXP63 = f7
+ FR_T = f9
+ FR_COEFF3 = f10
+ FR_COEFF4 = f11
+ FR_COEFF5 = f12
+ FR_COEFF6 = f13
+ FR_COEFF1 = f14
+ FR_COEFF2 = f15
+ FR_2P14 = f32
+ FR_UF_TEST = f33
+ FR_D = f34
+ FR_R = f35
+ FR_2EXP = f36
+ FR_EMIN = f37
+ FR_P34 = f38
+ FR_P56 = f39
+ FR_R2 = f40
+ FR_P12 = f41
+ FR_TS = f42
+ FR_P36 = f43
+ FR_P02 = f44
+ FR_R3 = f45
+ FR_P06 = f46
+
+
+ GR_ADDR0 = r2
+ GR_ADDR = r2
+ GR_D_ADDR0 = r3
+ GR_D_ADDR = r3
+ GR_LEADBITS = r8
+ GR_256 = r9
+ GR_EM63 = r10
+ GR_255 = r11
+ GR_EXPON = r14
+ GR_BM63 = r15
+ GR_UF_TEST = r16
+ GR_INDEX = r17
+ GR_K = r18
+ GR_KF = r19
+ GR_2P14 = r19
+ GR_EMIN = r20
+ GR_IT = r21
+ GR_ID = r22
+ GR_63 = r23
+ GR_CONST1 = r24
+ GR_EBIAS = r25
+ GR_CONST2 = r26
+ GR_CONST3 = r27
+ GR_SIGNIF = r28
+ GR_ARGEXP = r29
+ GR_SGN = r30
+ GR_EMIN1 = r31
+ GR_SREG = r32
+
+ GR_SAVE_B0 = r33
+ GR_SAVE_PFS = r34
+ GR_SAVE_GP = r35
+ GR_SAVE_SP = r36
+
+ GR_Parameter_X = r37
+ GR_Parameter_Y = r38
+ GR_Parameter_RESULT= r39
+ GR_Parameter_TAG = r40
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+ data8 0x3fac6b08d704a0c0 // C_3
+ data8 0x3f83b2ab6fba4e77 // C_4
+ data8 0x3f55d87fe78a6731 // C_5
+ data8 0x3f2430912f86c787 // C_6
+ data8 0xb17217f7d1cf79ab, 0x00003ffe // C_1
+ data8 0xf5fdeffc162c7541, 0x00003ffc // C_2
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+ data8 0x8000000000000000, 0x8058d7d2d5e5f6b1
+ data8 0x80b1ed4fd999ab6c, 0x810b40a1d81406d4
+ data8 0x8164d1f3bc030773, 0x81bea1708dde6056
+ data8 0x8218af4373fc25ec, 0x8272fb97b2a5894c
+ data8 0x82cd8698ac2ba1d7, 0x83285071e0fc4547
+ data8 0x8383594eefb6ee37, 0x83dea15b9541b132
+ data8 0x843a28c3acde4046, 0x8495efb3303efd30
+ data8 0x84f1f656379c1a29, 0x854e3cd8f9c8c95d
+ data8 0x85aac367cc487b15, 0x86078a2f23642a9f
+ data8 0x8664915b923fba04, 0x86c1d919caef5c88
+ data8 0x871f61969e8d1010, 0x877d2afefd4e256c
+ data8 0x87db357ff698d792, 0x88398146b919f1d4
+ data8 0x88980e8092da8527, 0x88f6dd5af155ac6b
+ data8 0x8955ee03618e5fdd, 0x89b540a7902557a4
+ data8 0x8a14d575496efd9a, 0x8a74ac9a79896e47
+ data8 0x8ad4c6452c728924, 0x8b3522a38e1e1032
+ data8 0x8b95c1e3ea8bd6e7, 0x8bf6a434adde0085
+ data8 0x8c57c9c4646f4dde, 0x8cb932c1bae97a95
+ data8 0x8d1adf5b7e5ba9e6, 0x8d7ccfc09c50e2f8
+ data8 0x8ddf042022e69cd6, 0x8e417ca940e35a01
+ data8 0x8ea4398b45cd53c0, 0x8f073af5a2013520
+ data8 0x8f6a8117e6c8e5c4, 0x8fce0c21c6726481
+ data8 0x9031dc431466b1dc, 0x9095f1abc540ca6b
+ data8 0x90fa4c8beee4b12b, 0x915eed13c89689d3
+ data8 0x91c3d373ab11c336, 0x9228ffdc10a051ad
+ data8 0x928e727d9531f9ac, 0x92f42b88f673aa7c
+ data8 0x935a2b2f13e6e92c, 0x93c071a0eef94bc1
+ data8 0x9426ff0fab1c04b6, 0x948dd3ac8ddb7ed3
+ data8 0x94f4efa8fef70961, 0x955c5336887894d5
+ data8 0x95c3fe86d6cc7fef, 0x962bf1cbb8d97560
+ data8 0x96942d3720185a00, 0x96fcb0fb20ac4ba3
+ data8 0x97657d49f17ab08e, 0x97ce9255ec4357ab
+ data8 0x9837f0518db8a96f, 0x98a1976f7597e996
+ data8 0x990b87e266c189aa, 0x9975c1dd47518c77
+ data8 0x99e0459320b7fa65, 0x9a4b13371fd166ca
+ data8 0x9ab62afc94ff864a, 0x9b218d16f441d63d
+ data8 0x9b8d39b9d54e5539, 0x9bf93118f3aa4cc1
+ data8 0x9c6573682ec32c2d, 0x9cd200db8a0774cb
+ data8 0x9d3ed9a72cffb751, 0x9dabfdff6367a2aa
+ data8 0x9e196e189d472420, 0x9e872a276f0b98ff
+ data8 0x9ef5326091a111ae, 0x9f6386f8e28ba651
+ data8 0x9fd228256400dd06, 0xa041161b3d0121be
+ data8 0xa0b0510fb9714fc2, 0xa11fd9384a344cf7
+ data8 0xa18faeca8544b6e4, 0xa1ffd1fc25cea188
+ data8 0xa27043030c496819, 0xa2e102153e918f9e
+ data8 0xa3520f68e802bb93, 0xa3c36b345991b47c
+ data8 0xa43515ae09e6809e, 0xa4a70f0c95768ec5
+ data8 0xa5195786be9ef339, 0xa58bef536dbeb6ee
+ data8 0xa5fed6a9b15138ea, 0xa6720dc0be08a20c
+ data8 0xa6e594cfeee86b1e, 0xa7596c0ec55ff55b
+ data8 0xa7cd93b4e965356a, 0xa8420bfa298f70d1
+ data8 0xa8b6d5167b320e09, 0xa92bef41fa77771b
+ data8 0xa9a15ab4ea7c0ef8, 0xaa1717a7b5693979
+ data8 0xaa8d2652ec907629, 0xab0386ef48868de1
+ data8 0xab7a39b5a93ed337, 0xabf13edf162675e9
+ data8 0xac6896a4be3fe929, 0xace0413ff83e5d04
+ data8 0xad583eea42a14ac6, 0xadd08fdd43d01491
+ data8 0xae493452ca35b80e, 0xaec22c84cc5c9465
+ data8 0xaf3b78ad690a4375, 0xafb51906e75b8661
+ data8 0xb02f0dcbb6e04584, 0xb0a957366fb7a3c9
+ data8 0xb123f581d2ac2590, 0xb19ee8e8c94feb09
+ data8 0xb21a31a66618fe3b, 0xb295cff5e47db4a4
+ data8 0xb311c412a9112489, 0xb38e0e38419fae18
+ data8 0xb40aaea2654b9841, 0xb487a58cf4a9c180
+ data8 0xb504f333f9de6484, 0xb58297d3a8b9f0d2
+ data8 0xb60093a85ed5f76c, 0xb67ee6eea3b22b8f
+ data8 0xb6fd91e328d17791, 0xb77c94c2c9d725e9
+ data8 0xb7fbefca8ca41e7c, 0xb87ba337a1743834
+ data8 0xb8fbaf4762fb9ee9, 0xb97c143756844dbf
+ data8 0xb9fcd2452c0b9deb, 0xba7de9aebe5fea09
+ data8 0xbaff5ab2133e45fb, 0xbb81258d5b704b6f
+ data8 0xbc034a7ef2e9fb0d, 0xbc85c9c560e7b269
+ data8 0xbd08a39f580c36bf, 0xbd8bd84bb67ed483
+ data8 0xbe0f6809860993e2, 0xbe935317fc378238
+ data8 0xbf1799b67a731083, 0xbf9c3c248e2486f8
+ data8 0xc0213aa1f0d08db0, 0xc0a6956e8836ca8d
+ data8 0xc12c4cca66709456, 0xc1b260f5ca0fbb33
+ data8 0xc238d2311e3d6673, 0xc2bfa0bcfad907c9
+ data8 0xc346ccda24976407, 0xc3ce56c98d21b15d
+ data8 0xc4563ecc5334cb33, 0xc4de8523c2c07baa
+ data8 0xc5672a115506dadd, 0xc5f02dd6b0bbc3d9
+ data8 0xc67990b5aa245f79, 0xc70352f04336c51e
+ data8 0xc78d74c8abb9b15d, 0xc817f681416452b2
+ data8 0xc8a2d85c8ffe2c45, 0xc92e1a9d517f0ecc
+ data8 0xc9b9bd866e2f27a3, 0xca45c15afcc72624
+ data8 0xcad2265e4290774e, 0xcb5eecd3b38597c9
+ data8 0xcbec14fef2727c5d, 0xcc799f23d11510e5
+ data8 0xcd078b86503dcdd2, 0xcd95da6a9ff06445
+ data8 0xce248c151f8480e4, 0xceb3a0ca5dc6a55d
+ data8 0xcf4318cf191918c1, 0xcfd2f4683f94eeb5
+ data8 0xd06333daef2b2595, 0xd0f3d76c75c5db8d
+ data8 0xd184df6251699ac6, 0xd2164c023056bcab
+ data8 0xd2a81d91f12ae45a, 0xd33a5457a3029054
+ data8 0xd3ccf099859ac379, 0xd45ff29e0972c561
+ data8 0xd4f35aabcfedfa1f, 0xd5872909ab75d18a
+ data8 0xd61b5dfe9f9bce07, 0xd6aff9d1e13ba2fe
+ data8 0xd744fccad69d6af4, 0xd7da67311797f56a
+ data8 0xd870394c6db32c84, 0xd9067364d44a929c
+ data8 0xd99d15c278afd7b6, 0xda3420adba4d8704
+ data8 0xdacb946f2ac9cc72, 0xdb63714f8e295255
+ data8 0xdbfbb797daf23755, 0xdc9467913a4f1c92
+ data8 0xdd2d818508324c20, 0xddc705bcd378f7f0
+ data8 0xde60f4825e0e9124, 0xdefb4e1f9d1037f2
+ data8 0xdf9612deb8f04420, 0xe031430a0d99e627
+ data8 0xe0ccdeec2a94e111, 0xe168e6cfd3295d23
+ data8 0xe2055afffe83d369, 0xe2a23bc7d7d91226
+ data8 0xe33f8972be8a5a51, 0xe3dd444c46499619
+ data8 0xe47b6ca0373da88d, 0xe51a02ba8e26d681
+ data8 0xe5b906e77c8348a8, 0xe658797368b3a717
+ data8 0xe6f85aaaee1fce22, 0xe798aadadd5b9cbf
+ data8 0xe8396a503c4bdc68, 0xe8da9958464b42ab
+ data8 0xe97c38406c4f8c57, 0xea1e4756550eb27b
+ data8 0xeac0c6e7dd24392f, 0xeb63b74317369840
+ data8 0xec0718b64c1cbddc, 0xecaaeb8ffb03ab41
+ data8 0xed4f301ed9942b84, 0xedf3e6b1d418a491
+ data8 0xee990f980da3025b, 0xef3eab20e032bc6b
+ data8 0xefe4b99bdcdaf5cb, 0xf08b3b58cbe8b76a
+ data8 0xf13230a7ad094509, 0xf1d999d8b7708cc1
+ data8 0xf281773c59ffb13a, 0xf329c9233b6bae9c
+ data8 0xf3d28fde3a641a5b, 0xf47bcbbe6db9fddf
+ data8 0xf5257d152486cc2c, 0xf5cfa433e6537290
+ data8 0xf67a416c733f846e, 0xf7255510c4288239
+ data8 0xf7d0df730ad13bb9, 0xf87ce0e5b2094d9c
+ data8 0xf92959bb5dd4ba74, 0xf9d64a46eb939f35
+ data8 0xfa83b2db722a033a, 0xfb3193cc4227c3f4
+ data8 0xfbdfed6ce5f09c49, 0xfc8ec01121e447bb
+ data8 0xfd3e0c0cf486c175, 0xfdedd1b496a89f35
+ data8 0xfe9e115c7b8f884c, 0xff4ecb59511ec8a5
+LOCAL_OBJECT_END(T_table)
+
+
+LOCAL_OBJECT_START(D_table)
+
+ data4 0x00000000, 0x9f55c08f, 0x1e93ffa3, 0x1dcd43a8
+ data4 0x1f751f79, 0x9f3cdd88, 0x9f43d155, 0x1eda222c
+ data4 0x1ef35513, 0x9f597895, 0x9e698881, 0x1ec71073
+ data4 0x1e50e371, 0x9dc01e19, 0x1de74133, 0x1e2f028c
+ data4 0x9edefb47, 0x1ebbac48, 0x9e8b0330, 0x9e9e9314
+ data4 0x1edc1d11, 0x1f098529, 0x9f52827c, 0x1f50050d
+ data4 0x1f301e8e, 0x1f5b64d1, 0x9f45e3ee, 0x9ef64d6d
+ data4 0x1d6ec5e8, 0x9e61ad9a, 0x1d44ccbb, 0x9e4a8bbb
+ data4 0x9cf11576, 0x9dcce7e7, 0x9d02ac90, 0x1f26ccf0
+ data4 0x9f0877c6, 0x9ddd62ae, 0x9f4b7fc3, 0x1ea8ef6b
+ data4 0x1ea4378d, 0x1ef6fc38, 0x1db99fd9, 0x1f22bf6f
+ data4 0x1f53e172, 0x1e85504a, 0x9f37cc75, 0x1f0c5e17
+ data4 0x1dde8aac, 0x9cb42bb2, 0x1e153cd7, 0x1eb62bba
+ data4 0x9e9b941b, 0x9ea80e3c, 0x1f508823, 0x1ec3fd36
+ data4 0x1e9ffaa1, 0x1e21e2eb, 0x9d948b1d, 0x9e8ac93a
+ data4 0x1ef7ee6f, 0x9e80dda3, 0x1f0814be, 0x1dc5ddfe
+ data4 0x1eedb9d1, 0x9f2aaa26, 0x9ea5b0fc, 0x1edf702e
+ data4 0x9e391201, 0x1f1316bb, 0x1ea27fb7, 0x9e05ed18
+ data4 0x9f199ed2, 0x1ee7fd7c, 0x1f003db6, 0x9eac3793
+ data4 0x9e5b8c10, 0x9f3af17c, 0x1bc9a8be, 0x1ee3c004
+ data4 0x9f19b1b2, 0x9f242ce9, 0x9ce67dd1, 0x9e4f6275
+ data4 0x1e20742c, 0x1eb9328a, 0x9f477153, 0x1d969718
+ data4 0x9f1e6c43, 0x1f2f67f4, 0x9f39c7e4, 0x9e3c4feb
+ data4 0x1da3956b, 0x9e7c685d, 0x1f280911, 0x9f0d8afb
+ data4 0x1e314b40, 0x9eb4f250, 0x9f1a34ad, 0x1ef5d5e7
+ data4 0x9f145496, 0x1e604827, 0x9f1e5195, 0x1e9c1fc0
+ data4 0x1efde521, 0x1e69b385, 0x1f316830, 0x9f244eae
+ data4 0x1f1787ec, 0x9e939971, 0x1f0bb393, 0x9f0511d6
+ data4 0x1ed919de, 0x1d8b7b28, 0x1e5ca4a9, 0x1e7c357b
+ data4 0x9e3ff8e8, 0x1eef53b5, 0x9ed22ed7, 0x1f16659b
+ data4 0x9f2db102, 0x9e2c6a78, 0x1f328d7d, 0x9f2fec3c
+ data4 0x1eb395bd, 0x9f242b84, 0x9e2683e6, 0x1ed71e68
+ data4 0x1efd1df5, 0x9e9eeafd, 0x9ed2249c, 0x1eef129a
+ data4 0x1d1ea44c, 0x9e81f7ff, 0x1eaf77c9, 0x9ee7a285
+ data4 0x1e1864ed, 0x9ee7edbb, 0x9e15a27d, 0x9ae61655
+ data4 0x1f1ff1a2, 0x1da29755, 0x9e5f46fb, 0x1e901236
+ data4 0x9eecfb9b, 0x9f204d2f, 0x1ec64685, 0x9eb809bd
+ data4 0x9e0026c5, 0x1d9f1da1, 0x1f142b49, 0x9f20f22e
+ data4 0x1f24b067, 0x1f185a4c, 0x9f09765c, 0x9ece902f
+ data4 0x1e2ca5db, 0x1e6de464, 0x9f071f67, 0x1f1518c3
+ data4 0x1ea13ded, 0x1f0b8414, 0x1edb6ad4, 0x9e548740
+ data4 0x9ea10efb, 0x1ee48a60, 0x1e7954c5, 0x9edad013
+ data4 0x9f21517d, 0x9e9b6e0c, 0x9ee7f9a6, 0x9ebd4298
+ data4 0x9d65b24e, 0x1eed751f, 0x9f1573ea, 0x9d430377
+ data4 0x9e13fc0c, 0x1e47008a, 0x1e3d5c1d, 0x1ef41a91
+ data4 0x9e4a4ef7, 0x9e952f18, 0x1d620566, 0x1d9b8d33
+ data4 0x1db06247, 0x1e94b31e, 0x1f0730ad, 0x9d79ffb4
+ data4 0x1ed64d51, 0x9e91fd11, 0x9e28d35a, 0x9dea0ed9
+ data4 0x1e891def, 0x9ee28ac0, 0x1e1db99b, 0x9ee1ce38
+ data4 0x9bdd9bca, 0x1eb72cb9, 0x9e8c53c6, 0x1e0df6ca
+ data4 0x1e8f2ccd, 0x9e9b0886, 0x1eeb3bc7, 0x1ec7e772
+ data4 0x9e210776, 0x9daf246c, 0x1ea1f151, 0x1ece4dc6
+ data4 0x1ce741c8, 0x1ed3c88f, 0x9ec9a4fd, 0x9e0c8d30
+ data4 0x1d2fbb26, 0x9ef212a7, 0x1ee44f1c, 0x9e445550
+ data4 0x1e075f77, 0x9d9291a3, 0x1f09c2ee, 0x9e012c88
+ data4 0x1f057d62, 0x9e7bb0dc, 0x9d8758ee, 0x1ee8d6c1
+ data4 0x9e509a57, 0x9e4ca7b7, 0x1e2cb341, 0x9ec35106
+ data4 0x1ecf3baf, 0x1e11781c, 0x1ea0cc78, 0x1eb75ca6
+ data4 0x1e961e1a, 0x1eb88853, 0x1e7abf50, 0x1ee38704
+ data4 0x9dc5ab0f, 0x1afe197b, 0x9ec07523, 0x9d9b7f78
+ data4 0x1f011618, 0x1ed43b0b, 0x9f035945, 0x9e3fd014
+ data4 0x9bbda5cd, 0x9e83f8ab, 0x1e58a928, 0x1e392d61
+ data4 0x1efdbb52, 0x1ee310a8, 0x9ec7ecc1, 0x1e8c9ed6
+ data4 0x9ef82dee, 0x9e70545b, 0x9ea53fc4, 0x1e40f419
+LOCAL_OBJECT_END(D_table)
+
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(exp2l)
+
+{.mii
+ // get exponent
+ getf.exp GR_EBIAS = f8
+ // GR_D_ADDR0 = pointer to D_table
+ addl GR_D_ADDR0 = @ltoff(D_table), gp
+ // GR_ADDR0 = pointer to C_1...C_6 followed by T_table
+ addl GR_ADDR0 = @ltoff(poly_coeffs), gp ;;
+}
+
+{.mfi
+ // get significand
+ getf.sig GR_SIGNIF = f8
+ // will continue only for normal/denormal numbers
+ fclass.nm.unc p12, p7 = f8, 0x1b
+ mov GR_63 = 63 ;;
+}
+
+{.mfi
+ nop.m 0
+ nop.f 0
+ // GR_CONST2 = bias+63-8
+ mov GR_CONST2 = 0xffff+55
+}
+{.mfi
+ // GR_CONST1 = bias+15
+ mov GR_CONST1 = 0xffff+15
+ nop.f 0
+ mov GR_CONST3 = 0x1ffff ;;
+}
+
+{.mfi
+ // load start address for C_1...C_6 followed by T_table
+ ld8 GR_ADDR = [ GR_ADDR0 ]
+ nop.f 0
+ // get sign of argument
+ andcm GR_SGN = GR_EBIAS, GR_CONST3
+}
+{.mfi
+ // GR_D_ADDR = pointer to D_table
+ ld8 GR_D_ADDR = [ GR_D_ADDR0 ]
+ nop.f 0
+ // get argument exponent
+ and GR_ARGEXP = GR_CONST3, GR_EBIAS ;;
+}
+
+{.mfi
+ alloc GR_SREG = ar.pfs, 1, 4, 4, 0
+ nop.f 0
+ // p6 = 1 if sign = 1
+ cmp.ne p6, p8 = GR_SGN, r0
+}
+{.mfi
+ // p7 = 1 if exponent> = 15 (argument out of range)
+ cmp.ge p7, p0 = GR_ARGEXP, GR_CONST1
+ nop.f 0
+ sub GR_EXPON = GR_CONST2, GR_ARGEXP ;;
+}
+
+{.mib
+ // load C_3, C_4
+ ldfpd FR_COEFF3, FR_COEFF4 = [ GR_ADDR ], 16
+ // get first exponent+8 bits
+ shr.u GR_LEADBITS = GR_SIGNIF, GR_EXPON
+ (p12) br.cond.spnt SPECIAL_exp2l
+}
+{.mib
+ mov GR_256 = 256
+ // exponent- = 63
+ sub GR_EM63 = GR_EBIAS, GR_63
+ (p7) br.cond.spnt OUT_RANGE_exp2l ;;
+}
+
+{.mlx
+ // load C_5, C_6
+ ldfpd FR_COEFF5, FR_COEFF6 = [ GR_ADDR ], 16
+ // GR_2P14 = 2^14
+ movl GR_2P14 = 0x46800000 ;;
+}
+
+{.mfi
+ // load C_1
+ ldfe FR_COEFF1 = [ GR_ADDR ], 16
+ fma.s0 f8 = f8, f1, f0
+ // GR_BM63 = bias-63
+ mov GR_BM63 = 0xffff-63 ;;
+}
+
+{.mlx
+ setf.s FR_2P14 = GR_2P14
+ // GR_UF_TEST = -2^14-62
+ movl GR_UF_TEST = 0xc6807c00
+}
+{.mfi
+ // load C_2
+ ldfe FR_COEFF2 = [ GR_ADDR ], 16
+ nop.f 0
+ mov GR_255 = 255 ;;
+}
+
+{.mib
+ // get 8-bit index
+ and GR_INDEX = GR_255, GR_LEADBITS
+ // get K = integer part
+ shr.u GR_K = GR_LEADBITS, 8
+ nop.b 0 ;;
+}
+
+{.mmi
+ // if sign = 1 && f>0, set p7 = 1
+ (p6) cmp.gt.unc p7, p0 = GR_INDEX, r0
+ setf.s FR_UF_TEST = GR_UF_TEST
+ shl GR_KF = GR_LEADBITS, GR_EXPON ;;
+}
+
+{.mfi
+ // if sign = 1 && f>0, set f = 1-f
+ (p7) sub GR_INDEX = GR_256, GR_INDEX
+ nop.f 0
+ // if sign = 1 && f>0, set K = K+1
+ (p7) add GR_K = GR_K, r0, 1 ;;
+}
+
+{.mfi
+ // FR_EXP63 = 2^{expon-63}
+ setf.exp FR_EXP63 = GR_EM63
+ nop.f 0
+ nop.i 0 ;;
+}
+
+.pred.rel "mutex", p6, p8
+{.mfi
+ // if sign = 0, set scale factor exponent S = K+bias-63
+ (p8) add GR_K = GR_K, GR_BM63
+ nop.f 0
+ // if sign = 1, set scale factor exponent S = -K+bias-63
+ (p6) sub GR_K = GR_BM63, GR_K ;;
+}
+
+{.mmi
+ // FR_KF0 = 2^{63-expon}*(K+f)
+ setf.sig FR_KF0 = GR_KF
+ nop.m 0
+ // GR_EMIN = EMIN = 2-2^14
+ mov GR_EMIN = 0x18cfff ;;
+}
+
+{.mfi
+ // get T_table index
+ shladd GR_IT = GR_INDEX, 3, GR_ADDR
+ // p7 = 1 if x> = 2^10
+ fcmp.ge.s1 p7, p12 = f8, FR_2P14
+ // get D_table index
+ shladd GR_ID = GR_INDEX, 2, GR_D_ADDR ;;
+}
+
+{.mfi
+ // load T_table value
+ ldf8 FR_T = [ GR_IT ]
+ // p7 = 1 if x<-2^10-50
+ (p12) fcmp.lt.s1 p7, p0 = f8, FR_UF_TEST
+ // GR_EMIN1 = EMIN = 2-2^14
+ shl GR_EMIN1 = GR_EMIN, 11 ;;
+}
+
+{.mmb
+ // f50 = scale factor = 2^{K-63}
+ setf.exp FR_2EXP = GR_K
+ // load D_table value
+ ldfs FR_D = [ GR_ID ]
+ (p7) br.cond.spnt OUT_RANGE_exp2l ;;
+}
+
+{.mfi
+ nop.m 0
+ // get r = x-(K+f)
+ fnma.s1 FR_R = FR_KF0, FR_EXP63, f8
+ nop.i 0 ;;
+}
+
+{.mfi
+ // FR_EMIN = EMIN
+ setf.s FR_EMIN = GR_EMIN1
+ // P34 = C_4*r+C_3
+ fma.s1 FR_P34 = FR_COEFF4, FR_R, FR_COEFF3
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P56 = C_6*r+C_5
+ fma.s1 FR_P56 = FR_COEFF6, FR_R, FR_COEFF5
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // r*r
+ fma.s1 FR_R2 = FR_R, FR_R, f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P12 = C_2*r+C_1
+ fma.s1 FR_P12 = FR_COEFF2, FR_R, FR_COEFF1
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // T* = scaling factor
+ fma.s1 FR_TS = FR_T, FR_2EXP, f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P36 = P34+r2*P56
+ fma.s1 FR_P36 = FR_P56, FR_R2, FR_P34
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // P02 = D+r*P12
+ fma.s1 FR_P02 = FR_P12, FR_R, FR_D
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // GR_ID = r*r2
+ fma.s1 FR_R3 = FR_R2, FR_R, f0
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // P06 = P02+r3*P36
+ fma.s1 FR_P06 = FR_P36, FR_R3, FR_P02
+ nop.i 0 ;;
+}
+
+{.mfi
+ nop.m 0
+ // underflow (x multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
+//
+
+// The final result is reconstructed as follows
+// exp(x) = T + T*P
+
+// Special values
+//*********************************************************************
+// expf(+0) = 1.0
+// expf(-0) = 1.0
+
+// expf(+qnan) = +qnan
+// expf(-qnan) = -qnan
+// expf(+snan) = +qnan
+// expf(-snan) = -qnan
+
+// expf(-inf) = +0
+// expf(+inf) = +inf
+
+// Overflow and Underflow
+//*********************************************************************
+// expf(x) = largest single normal when
+// x = 88.72283 = 0x42b17217
+
+// expf(x) = smallest single normal when
+// x = -87.33654 = 0xc2aeac4f
+
+// expf(x) = largest round-to-nearest single zero when
+// x = -103.97208 = 0xc2cff1b5
+
+
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8, input
+// f6,f7, f9 -> f15, f32 -> f40
+
+// General registers used:
+// r3, r23 -> r38
+
+// Predicate registers used:
+// p10 -> p15
+
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r3
+
+rTmp = r23
+rJ = r23
+rN = r24
+rTblAddr = r25
+rA3 = r26
+rExpHalf = r27
+rLn2Div64 = r28
+r17ones_m1 = r29
+rGt_ln = r29
+rRightShifter = r30
+r64DivLn2 = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fT = f33
+fMIN_SGL_OFLOW_ARG = f34
+fMAX_SGL_ZERO_ARG = f35
+fMAX_SGL_NORM_ARG = f36
+fMIN_SGL_NORM_ARG = f37
+fRSqr = f38
+fTmp = f39
+fGt_pln = f39
+fWre_urm_f8 = f40
+fFtz_urm_f8 = f40
+
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(_expf_table)
+data4 0x42b17218 // Smallest sgl arg to overflow sgl result, +88.7228
+data4 0xc2cff1b5 // Largest sgl for rnd-to-nearest 0 result, -103.9720
+data4 0x42b17217 // Largest sgl arg to give normal sgl result, +88.7228
+data4 0xc2aeac4f // Smallest sgl arg to give normal sgl result, -87.3365
+//
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_expf_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(expf)
+
+{ .mlx
+ addl rTblAddr = @ltoff(_expf_table),gp
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
+}
+{ .mlx
+ addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
+}
+;;
+
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p14, p0 = f8, 0x22 // test for -INF
+ shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ addl rExpHalf = 0xFFFE, r0 // exponent of 1/2
+}
+;;
+
+{ .mfi
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p15, p0 = f8, 0x1e1 // test for NaT,NaN,+Inf
+ nop.i 0
+}
+{ .mlx
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ nop.i 0
+}
+{ .mfb
+ setf.s fA3 = rA3 // load A3 to FP reg
+(p14) fma.s.s0 f8 = f0, f1, f0 // result if x = -inf
+(p14) br.ret.spnt b0 // exit here if x = -inf
+}
+;;
+
+{ .mfi
+ setf.exp fA2 = rExpHalf // load A2 to FP reg
+ fcmp.eq.s0 p6, p0 = f8, f0 // Dummy to flag denorm
+ nop.i 0
+}
+{ .mfb
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+(p15) fma.s.s0 f8 = f8, f1, f0 // result if x = NaT,NaN,+Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,+Inf
+}
+;;
+
+{ .mfb
+ // overflow and underflow_zero threshold
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_ZERO_ARG = [rTblAddr], 8
+(p13) fma.s.s0 f8 = f1, f1, f0 // result if x = 0.0
+(p13) br.ret.spnt b0 // exit here if x =0.0
+}
+;;
+
+ // max normal and underflow_denorm threshold
+{ .mfi
+ ldfps fMAX_SGL_NORM_ARG, fMIN_SGL_NORM_ARG = [rTblAddr], 8
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ nop.i 0
+}
+;;
+
+// Divide arguments into the following categories:
+// Certain Underflow p11 - -inf < x <= MAX_SGL_ZERO_ARG
+// Possible Underflow p13 - MAX_SGL_ZERO_ARG < x < MIN_SGL_NORM_ARG
+// Certain Safe - MIN_SGL_NORM_ARG <= x <= MAX_SGL_NORM_ARG
+// Possible Overflow p14 - MAX_SGL_NORM_ARG < x < MIN_SGL_OFLOW_ARG
+// Certain Overflow p15 - MIN_SGL_OFLOW_ARG <= x < +inf
+//
+// If the input is really a single arg, then there will never be
+// "Possible Overflow" arguments.
+//
+
+{ .mfi
+ nop.m 0
+ // check for overflow
+ fcmp.ge.s1 p15, p0 = fNormX, fMIN_SGL_OFLOW_ARG
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // check for underflow and tiny (+0) result
+ fcmp.le.s1 p11, p0 = fNormX, fMAX_SGL_ZERO_ARG
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ // branch out if overflow
+(p15) br.cond.spnt EXP_CERTAIN_OVERFLOW
+}
+;;
+
+{ .mfb
+ getf.sig rNJ = fNint // bits of n, j
+ // check for underflow and deno result
+ fcmp.lt.s1 p13, p0 = fNormX, fMIN_SGL_NORM_ARG
+ // branch out if underflow and tiny (+0) result
+(p11) br.cond.spnt EXP_CERTAIN_UNDERFLOW
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // check for possible overflow
+ fcmp.gt.s1 p14, p0 = fNormX, fMAX_SGL_NORM_ARG
+ extr.u rJ = rNJ, 0, 6 // bits of j
+}
+{ .mfi
+ addl rN = 0xFFFF - 63, rNJ // biased and shifted n
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ nop.i 0
+}
+;;
+
+{ .mfi
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ nop.f 0
+ shr rN = rN, 6 // biased n
+}
+;;
+
+{ .mfi
+ ld8 rJ = [rJ]
+ nop.f 0
+ shl rN = rN, 52 // 2^n bits in DP format
+}
+;;
+
+{ .mfi
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.d fT = rN // 2^n * 2^(j/64)
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
+ nop.i 0
+}
+;;
+
+{ .mbb
+ nop.m 0
+ // branch out if possible underflow
+(p13) br.cond.spnt EXP_POSSIBLE_UNDERFLOW
+ // branch out if possible overflow result
+(p14) br.cond.spnt EXP_POSSIBLE_OVERFLOW
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // final result in the absence of over- and underflow
+ fma.s.s0 f8 = fP, fT, fT
+ // exit here in the absence of over- and underflow
+ br.ret.sptk b0
+}
+;;
+
+EXP_POSSIBLE_OVERFLOW:
+
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
+
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
+
+{ .mfi
+ mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt EXP_CERTAIN_OVERFLOW // Branch if overflow
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+// here if overflow
+EXP_CERTAIN_OVERFLOW:
+{ .mmi
+ addl r17ones_m1 = 0x1FFFE, r0
+;;
+ setf.exp fTmp = r17ones_m1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ alloc r32=ar.pfs,0,3,4,0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+}
+{ .mfb
+ mov GR_Parameter_TAG = 16
+ fma.s.s0 FR_RESULT = fTmp, fTmp, fTmp // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
+}
+;;
+
+EXP_POSSIBLE_UNDERFLOW:
+
+// Here if fMAX_SGL_ZERO_ARG < x < fMIN_SGL_NORM_ARG
+// Underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x41 // Get user's round mode, set ftz
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s.s2 fFtz_urm_f8 = fP, fT, fT // Result with ftz set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off ftz in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s1 p6, p7 = fFtz_urm_f8, f0 // Test for underflow
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT // Compute result, set I, maybe U
+ nop.i 0
+}
+;;
+
+{ .mbb
+ nop.m 0
+(p6) br.cond.spnt EXP_UNDERFLOW_COMMON // Branch if really underflow
+(p7) br.ret.sptk b0 // Exit if really no underflow
+}
+;;
+
+EXP_CERTAIN_UNDERFLOW:
+// Here if x < fMAX_SGL_ZERO_ARG
+// Result will be zero (or smallest denorm if round to +inf) with I, U set
+{ .mmi
+ mov rTmp = 1
+;;
+ setf.exp fTmp = rTmp // Form small normal
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.se fTmp = fTmp, f64DivLn2 // Small with non-trial signif
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fTmp, fTmp, f0 // Set I,U, tiny (+0.0) result
+ br.cond.sptk EXP_UNDERFLOW_COMMON
+}
+;;
+
+EXP_UNDERFLOW_COMMON:
+// Determine if underflow result is zero or nonzero
+{ .mfi
+ alloc r32=ar.pfs,0,3,4,0
+ fcmp.eq.s1 p6, p0 = f8, f0
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fmerge.s FR_X = fNormX,fNormX
+(p6) br.cond.spnt EXP_UNDERFLOW_ZERO
+}
+;;
+
+EXP_UNDERFLOW_NONZERO:
+// Here if x < fMIN_SGL_NORM_ARG and result nonzero;
+// I, U are set
+{ .mfb
+ mov GR_Parameter_TAG = 17
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
+}
+;;
+
+EXP_UNDERFLOW_ZERO:
+// Here if x < fMIN_SGL_NORM_ARG and result zero;
+// I, U are set
+{ .mfb
+ mov GR_Parameter_TAG = 17
+ nop.f 0 // FR_RESULT already set
+ br.cond.sptk __libm_error_region
+}
+;;
+
+GLOBAL_IEEE754_END(expf)
+libm_hidden_def (__expf)
+libm_alias_float_other (__exp, exp)
+#ifdef SHARED
+.symver expf,expf@@GLIBC_2.27
+.weak __expf_compat
+.set __expf_compat,__expf
+.symver __expf_compat,expf@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_expl.c b/sysdeps/ia64/fpu/e_expl.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_expl.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_fmod.S b/sysdeps/ia64/fpu/e_fmod.S
new file mode 100644
index 0000000000..33aaaf6314
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_fmod.S
@@ -0,0 +1,555 @@
+.file "fmod.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//====================================================================
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/28/00 Set FR_Y to f9
+// 03/11/02 Fixed flags for fmod(qnan,zero)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
+//
+// API
+//====================================================================
+// double fmod(double,double);
+//
+// Overview of operation
+//====================================================================
+// fmod(a,b)=a-i*b,
+// where i is an integer such that, if b!=0,
+// |i|<|a/b| and |a/b-i|<1
+//
+// Algorithm
+//====================================================================
+// a). if |a|<|b|, return a
+// b). get quotient and reciprocal overestimates accurate to
+// 33 bits (q2,y2)
+// c). if the exponent difference (exponent(a)-exponent(b))
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
+// round quotient estimate to single precision (k=RN(q2)),
+// calculate partial remainder (a'=a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/-Inf or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p11
+// General registers: r2,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(fmod)
+
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f6=|a|
+ fmerge.s f6=f0,f8
+ mov r2 = 0x0ffdd
+}
+ {.mfi
+ nop.m 0
+ // f7=|b|
+ fmerge.s f7=f0,f9
+ nop.i 0;;
+}
+
+{ .mfi
+ setf.exp f11 = r2
+ // (1) y0
+ frcpa.s1 f10,p6=f6,f7
+ nop.i 0
+}
+
+// Y +-NAN, +-inf, +-0? p7
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0xe7
+ nop.i 999;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999
+}
+
+// |x| < |y|? Return x p8
+{ .mfi
+ nop.m 999
+ fcmp.lt.unc.s1 p8,p0 = f6,f7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 0
+ // normalize y (if |x|<|y|)
+ (p8) fma.s0 f9=f9,f1,f0
+ nop.i 0;;
+}
+
+ { .mfi
+ mov r2=0x1001f
+ // (2) q0=a*y0
+ (p6) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 f12=f7,f10,f1
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x (if |x|<|y|)
+ (p8) fma.d.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.bbb
+ (p9) br.cond.spnt FMOD_X_NAN_INF
+ (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
+ // if |x|<|y|, return
+ (p8) br.ret.spnt b0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x
+ fma.s0 f6=f6,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // normalize y
+ fma.s0 f7=f7,f1,f0
+ nop.i 0;;
+}
+
+ {.mfi
+ // f15=2^32
+ setf.exp f15=r2
+ // (4) q1=q0+e0*q0
+ (p6) fma.s1 f13=f12,f13,f13
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 f14=f12,f12,f11
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ movl r2=0x33a00000;;
+}
+{ .mfi
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 f10=f12,f10,f10
+ nop.i 0;;
+}
+{.mfi
+ // set f12=1.25*2^{-24}
+ setf.s f12=r2
+ // (7) q2=q1+e1*q1
+ (p6) fma.s1 f13=f13,f14,f13
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 f10=f14,f10,f10
+ // set p6=0, p10=0
+ cmp.ne.and p6,p10=r0,r0;;
+}
+
+.align 32
+loop53:
+ {.mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8,p7=f13,f15
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 f11=f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // if exponent>32, round quotient to single precision (perform in advance)
+ fma.s.s1 f13=f13,f1,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // set f12=sgn(a)
+ (p8) fmerge.s f12=f8,f1
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf f13=f11
+ nop.i 0;;
+}
+ { .mfi
+ nop.m 0
+ // calculate remainder (assuming f13=RZ(Q))
+ (p7) fnma.s1 f14=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 f11=f13,f12,f13
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 f11=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // (p7) calculate remainder (assuming f11=RZ(Q))
+ (p7) fnma.s1 f6=f11,f7,f6
+ nop.i 0;;
+}
+
+
+ {.mfi
+ nop.m 0
+ // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 f13=f14,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // was f14=RZ(Q) ? (then new remainder f14>=0)
+ (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
+ nop.b 0;;
+}
+
+
+.pred.rel "mutex",p6,p10
+ {.mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
+ (p6) fma.d.s0 f8=f11,f12,f9
+ nop.b 0
+}
+ {.mfb
+ nop.m 0
+ // calculate remainder (single precision)
+ // set correct sign of result before returning
+ (p10) fma.d.s0 f8=f11,f12,f0
+ (p8) br.ret.sptk b0;;
+}
+ {.mfi
+ nop.m 0
+ // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // if f14 was RZ(Q), set remainder to f14
+ (p9) mov f6=f14
+ br.cond.sptk loop53;;
+}
+
+
+
+FMOD_X_NAN_INF:
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fclass.m p10,p0=f8,0xc3 // Test x=nan
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ fma.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FMOD_Y_ZERO;;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m p9,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+(p8) mov GR_Parameter_TAG=274 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fma.d.s0 f8=f8,f1,f0
+(p8) br.spnt __libm_error_region;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
+}
+
+
+FMOD_Y_NAN_INF_ZERO:
+
+// Y INF
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.d.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+FMOD_Y_ZERO:
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+{.mfi
+ nop.m 0
+ // set Invalid
+ frcpa.s0 f12,p0=f0,f0
+ nop.i 0
+}
+// X NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa.s0 f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa.s0 f11,p7 = f9,f9
+ mov GR_Parameter_TAG = 121 ;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfb
+ nop.m 999
+ fma.d.s0 f8=f11,f1,f0
+ br.sptk __libm_error_region;;
+}
+
+GLOBAL_IEEE754_END(fmod)
+libm_alias_double_other (__fmod, fmod)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_fmodf.S b/sysdeps/ia64/fpu/e_fmodf.S
new file mode 100644
index 0000000000..06e3cc4180
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_fmodf.S
@@ -0,0 +1,569 @@
+.file "fmodf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//====================================================================
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/28/00 Set FR_Y to f9
+// 03/11/02 Fixed flags for fmodf(qnan,zero)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/28/03 Fix: fmod(sNaN,0) no longer sets errno
+//
+// API
+//====================================================================
+// float fmodf(float,float);
+//
+// Overview of operation
+//====================================================================
+// fmod(a,b)=a-i*b,
+// where i is an integer such that, if b!=0,
+// |i|<|a/b| and |a/b-i|<1
+
+// Algorithm
+//====================================================================
+// a). if |a|<|b|, return a
+// b). get quotient and reciprocal overestimates accurate to
+// 33 bits (q2,y2)
+// c). if the exponent difference (exponent(a)-exponent(b))
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>=32 (q2>=2^32)
+// round quotient estimate to single precision (k=RN(q2)),
+// calculate partial remainder (a'=a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
+
+// Special cases
+//====================================================================
+// a=+/-Inf or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+
+// Registers used
+//====================================================================
+// Predicate registers: p6-p11
+// General registers: r2,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(fmodf)
+
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f6=|a|
+ fmerge.s f6=f0,f8
+ mov r2 = 0x0ffdd
+}
+ {.mfi
+ nop.m 0
+ // f7=|b|
+ fmerge.s f7=f0,f9
+ nop.i 0;;
+}
+
+{ .mfi
+ setf.exp f11 = r2
+ // (1) y0
+ frcpa.s1 f10,p6=f6,f7
+ nop.i 0
+}
+
+// eliminate special cases
+// Y +-NAN, +-inf, +-0? p7
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0xe7
+ nop.i 999;;
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999
+}
+
+// |x| < |y|? Return x p8
+{ .mfi
+ nop.m 999
+ fcmp.lt.unc.s1 p8,p0 = f6,f7
+ nop.i 999 ;;
+}
+
+{ .mfi
+ nop.m 0
+ // normalize y (if |x|<|y|)
+ (p8) fma.s0 f9=f9,f1,f0
+ nop.i 0;;
+}
+
+ { .mfi
+ mov r2=0x1001f
+ // (2) q0=a*y0
+ (p6) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 f12=f7,f10,f1
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x (if |x|<|y|)
+ (p8) fma.s.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.bbb
+ (p9) br.cond.spnt FMOD_X_NAN_INF
+ (p7) br.cond.spnt FMOD_Y_NAN_INF_ZERO
+ // if |x|<|y|, return
+ (p8) br.ret.spnt b0;;
+}
+
+ {.mfi
+ nop.m 0
+ // normalize x
+ fma.s0 f6=f6,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // normalize y
+ fma.s0 f7=f7,f1,f0
+ nop.i 0;;
+}
+
+
+ {.mfi
+ // f15=2^32
+ setf.exp f15=r2
+ // (4) q1=q0+e0*q0
+ (p6) fma.s1 f13=f12,f13,f13
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 f14=f12,f12,f11
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ movl r2=0x33a00000;;
+}
+{ .mfi
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 f10=f12,f10,f10
+ nop.i 0;;
+}
+{.mfi
+ // set f12=1.25*2^{-24}
+ setf.s f12=r2
+ // (7) q2=q1+e1*q1
+ (p6) fma.s1 f13=f13,f14,f13
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 f10=f14,f10,f10
+ // set p6=0, p10=0
+ cmp.ne.and p6,p10=r0,r0;;
+}
+
+.align 32
+loop24:
+ {.mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8,p7=f13,f15
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 f11=f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // if exponent>32, round quotient to single precision (perform in advance)
+ fma.s.s1 f13=f13,f1,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // set f12=sgn(a)
+ (p8) fmerge.s f12=f8,f1
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf f13=f11
+ nop.i 0;;
+}
+ { .mfi
+ nop.m 0
+ // calculate remainder (assuming f13=RZ(Q))
+ (p7) fnma.s1 f14=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q=q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 f11=f13,f12,f13
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 f11=f13,f7,f6
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // (p7) calculate remainder (assuming f11=RZ(Q))
+ (p7) fnma.s1 f6=f11,f7,f6
+ nop.i 0;;
+}
+
+
+ {.mfi
+ nop.m 0
+ // Final iteration (p8): is f6 the correct remainder (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6,p10=f11,f0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 f13=f14,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // was f14=RZ(Q) ? (then new remainder f14>=0)
+ (p7) fcmp.lt.unc.s1 p7,p9=f14,f0
+ nop.b 0;;
+}
+
+
+.pred.rel "mutex",p6,p10
+ {.mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was overestimated)
+ // also set correct sign by using f9=|b|*sgn(a), f12=sgn(a)
+ (p6) fma.s.s0 f8=f11,f12,f9
+ nop.b 0
+}
+ {.mfb
+ nop.m 0
+ // calculate remainder (single precision)
+ // set correct sign of result before returning
+ (p10) fma.s.s0 f8=f11,f12,f0
+ (p8) br.ret.sptk b0;;
+}
+ {.mfi
+ nop.m 0
+ // if f13!=RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 f13=f6,f10,f0
+ nop.i 0
+}
+ {.mfb
+ nop.m 0
+ // if f14 was RZ(Q), set remainder to f14
+ (p9) mov f6=f14
+ br.cond.sptk loop24;;
+}
+
+ { .mmb
+ nop.m 0
+ nop.m 0
+ br.ret.sptk b0;;
+ }
+
+FMOD_X_NAN_INF:
+
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fclass.m p10,p0=f8,0xc3 // Test x=nan
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ fma.s0 f8=f8,f1,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+(p10) fclass.m p10,p0=f9,0x07 // Test x=nan, and y=zero
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+(p10) br.ret.spnt b0;; // Exit with result=x if x=nan and y=zero
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FMOD_Y_ZERO;;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p8,p9 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m p9,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p0 = f8,f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+(p8) mov GR_Parameter_TAG=275 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p8) fma.s.s0 f8=f8,f1,f0
+(p8) br.spnt __libm_error_region;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) frcpa.s0 f8,p7=f8,f9
+ br.ret.sptk b0 ;;
+}
+
+
+FMOD_Y_NAN_INF_ZERO:
+
+// Y INF
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.s.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+FMOD_Y_ZERO:
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+{.mfi
+ nop.m 0
+ // set Invalid
+ frcpa.s0 f12,p0=f0,f0
+ nop.i 999
+}
+// X NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa.s0 f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa.s0 f11,p7 = f0,f0
+nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s.s0 f8=f11,f1,f0
+ nop.i 999;;
+}
+
+EXP_ERROR_RETURN:
+
+
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG=122
+ br.sptk __libm_error_region;;
+}
+
+GLOBAL_IEEE754_END(fmodf)
+libm_alias_float_other (__fmod, fmod)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#;; // Call error handling function
+}
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_fmodl.S b/sysdeps/ia64/fpu/e_fmodl.S
new file mode 100644
index 0000000000..c493a93400
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_fmodl.S
@@ -0,0 +1,682 @@
+.file "fmodl.s"
+
+
+// Copyright (c) 2000 - 2004, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//====================================================================
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [ the previously overwritten ] GR_Parameter_RESULT.
+// 11/28/00 Set FR_Y to f9
+// 03/11/02 Fixed flags for fmodl(qnan, zero)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header:.section,.global,.proc,.align
+// 04/28/03 Fix: fmod(sNaN, 0) no longer sets errno
+// 11/23/04 Reformatted routine and improved speed
+//
+// API
+//====================================================================
+// long double fmodl(long double, long double);
+//
+// Overview of operation
+//====================================================================
+// fmod(a, b)= a-i*b,
+// where i is an integer such that, if b!= 0,
+// |i|<|a/b| and |a/b-i|<1
+//
+// Algorithm
+//====================================================================
+// a). if |a|<|b|, return a
+// b). get quotient and reciprocal overestimates accurate to
+// 33 bits (q2, y2)
+// c). if the exponent difference (exponent(a)-exponent(b))
+// is less than 32, truncate quotient to integer and
+// finish in one iteration
+// d). if exponent(a)-exponent(b)>= 32 (q2>= 2^32)
+// round quotient estimate to single precision (k= RN(q2)),
+// calculate partial remainder (a'= a-k*b),
+// get quotient estimate (a'*y2), and repeat from c).
+//
+// Registers used
+//====================================================================
+
+GR_SMALLBIASEXP = r2
+GR_2P32 = r3
+GR_SMALLBIASEXP = r20
+GR_ROUNDCONST = r21
+GR_SIG_B = r22
+GR_ARPFS = r23
+GR_TMP1 = r24
+GR_TMP2 = r25
+GR_TMP3 = r26
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+FR_ABS_A = f6
+FR_ABS_B = f7
+FR_Y_INV = f10
+FR_SMALLBIAS = f11
+FR_E0 = f12
+FR_Q = f13
+FR_E1 = f14
+FR_2P32 = f15
+FR_TMPX = f32
+FR_TMPY = f33
+FR_ROUNDCONST = f34
+FR_QINT = f35
+FR_QRND24 = f36
+FR_NORM_B = f37
+FR_TMP = f38
+FR_TMP2 = f39
+FR_DFLAG = f40
+FR_Y_INV0 = f41
+FR_Y_INV1 = f42
+FR_Q0 = f43
+FR_Q1 = f44
+FR_QINT_Z = f45
+FR_QREM = f46
+FR_B_SGN_A = f47
+
+.section .text
+GLOBAL_IEEE754_ENTRY(fmodl)
+
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ getf.sig GR_SIG_B = f9
+ // FR_ABS_A = |a|
+ fmerge.s FR_ABS_A = f0, f8
+ mov GR_SMALLBIASEXP = 0x0ffdd
+}
+{ .mfi
+ nop.m 0
+ // FR_ABS_B = |b|
+ fmerge.s FR_ABS_B = f0, f9
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp FR_SMALLBIAS = GR_SMALLBIASEXP
+ // (1) y0
+ frcpa.s1 FR_Y_INV0, p6 = FR_ABS_A, FR_ABS_B
+ nop.i 0
+}
+;;
+
+{ .mlx
+ nop.m 0
+ movl GR_ROUNDCONST = 0x33a00000
+}
+;;
+
+// eliminate special cases
+{ .mmi
+ nop.m 0
+ nop.m 0
+ // y pseudo-zero ?
+ cmp.eq p7, p10 = GR_SIG_B, r0
+}
+;;
+
+// set p7 if b +/-NAN, +/-inf, +/-0
+{ .mfi
+ nop.m 0
+ (p10) fclass.m p7, p10 = f9, 0xe7
+ nop.i 0
+}
+;;
+
+{ .mfi
+ mov GR_2P32 = 0x1001f
+ // (2) q0 = a*y0
+ (p6) fma.s1 FR_Q0 = FR_ABS_A, FR_Y_INV0, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (3) e0 = 1 - b * y0
+ (p6) fnma.s1 FR_E0 = FR_ABS_B, FR_Y_INV0, f1
+ nop.i 0
+}
+;;
+
+// set p9 if a +/-NAN, +/-inf
+{ .mfi
+ nop.m 0
+ fclass.m.unc p9, p11 = f8, 0xe3
+ nop.i 0
+}
+ // |a| < |b|? Return a, p8=1
+{ .mfi
+ nop.m 0
+ (p10) fcmp.lt.unc.s1 p8, p0 = FR_ABS_A, FR_ABS_B
+ nop.i 0
+}
+;;
+
+// set p7 if b +/-NAN, +/-inf, +/-0
+{ .mfi
+ nop.m 0
+ // pseudo-NaN ?
+ (p10) fclass.nm p7, p0 = f9, 0xff
+ nop.i 0
+}
+;;
+
+// set p9 if a is +/-NaN, +/-Inf
+{ .mfi
+ nop.m 0
+ (p11) fclass.nm p9, p0 = f8, 0xff
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // b denormal ? set D flag (if |a|<|b|)
+ (p8) fnma.s0 FR_DFLAG = f9, f1, f9
+ nop.i 0
+}
+;;
+
+{ .mfi
+ // FR_2P32 = 2^32
+ setf.exp FR_2P32 = GR_2P32
+ // (4) q1 = q0+e0*q0
+ (p6) fma.s1 FR_Q1 = FR_E0, FR_Q0, FR_Q0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (5) e1 = e0 * e0 + 2^-34
+ (p6) fma.s1 FR_E1 = FR_E0, FR_E0, FR_SMALLBIAS
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // normalize a (if |a|<|b|)
+ (p8) fma.s0 f8 = f8, f1, f0
+ nop.i 0
+}
+{ .bbb
+ (p9) br.cond.spnt FMOD_A_NAN_INF
+ (p7) br.cond.spnt FMOD_B_NAN_INF_ZERO
+ // if |a|<|b|, return
+ (p8) br.ret.spnt b0
+}
+;;
+
+
+{ .mfi
+ nop.m 0
+ // (6) y1 = y0 + e0 * y0
+ (p6) fma.s1 FR_Y_INV1 = FR_E0, FR_Y_INV0, FR_Y_INV0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // a denormal ? set D flag
+ // b denormal ? set D flag
+ fcmp.eq.s0 p12,p0 = FR_ABS_A, FR_ABS_B
+ nop.i 0
+}
+{ .mfi
+ // set FR_ROUNDCONST = 1.25*2^{-24}
+ setf.s FR_ROUNDCONST = GR_ROUNDCONST
+ // (7) q2 = q1+e1*q1
+ (p6) fma.s1 FR_Q = FR_Q1, FR_E1, FR_Q1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.s FR_B_SGN_A = f8, f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (8) y2 = y1 + e1 * y1
+ (p6) fma.s1 FR_Y_INV = FR_E1, FR_Y_INV1, FR_Y_INV1
+ // set p6 = 0, p10 = 0
+ cmp.ne.and p6, p10 = r0, r0
+}
+;;
+
+// will compute integer quotient bits (24 bits per iteration)
+.align 32
+loop64:
+{ .mfi
+ nop.m 0
+ // compare q2, 2^32
+ fcmp.lt.unc.s1 p8, p7 = FR_Q, FR_2P32
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // will truncate quotient to integer, if exponent<32 (in advance)
+ fcvt.fx.trunc.s1 FR_QINT = FR_Q
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // if exponent>32 round quotient to single precision (perform in advance)
+ fma.s.s1 FR_QRND24 = FR_Q, f1, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // set FR_ROUNDCONST = sgn(a)
+ (p8) fmerge.s FR_ROUNDCONST = f8, f1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // normalize truncated quotient
+ (p8) fcvt.xf FR_QRND24 = FR_QINT
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // calculate remainder (assuming FR_QRND24 = RZ(Q))
+ (p7) fnma.s1 FR_E1 = FR_QRND24, FR_ABS_B, FR_ABS_A
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // also if exponent>32, round quotient to single precision
+ // and subtract 1 ulp: q = q-q*(1.25*2^{-24})
+ (p7) fnma.s.s1 FR_QINT_Z = FR_QRND24, FR_ROUNDCONST, FR_QRND24
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // (p8) calculate remainder (82-bit format)
+ (p8) fnma.s1 FR_QREM = FR_QRND24, FR_ABS_B, FR_ABS_A
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (p7) calculate remainder (assuming FR_QINT_Z = RZ(Q))
+ (p7) fnma.s1 FR_ABS_A = FR_QINT_Z, FR_ABS_B, FR_ABS_A
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // Final iteration (p8): is FR_ABS_A the correct remainder
+ // (quotient was not overestimated) ?
+ (p8) fcmp.lt.unc.s1 p6, p10 = FR_QREM, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // get new quotient estimation: a'*y2
+ (p7) fma.s1 FR_Q = FR_E1, FR_Y_INV, f0
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // was FR_Q = RZ(Q) ? (then new remainder FR_E1> = 0)
+ (p7) fcmp.lt.unc.s1 p7, p9 = FR_E1, f0
+ nop.b 0
+}
+;;
+
+.pred.rel "mutex", p6, p10
+{ .mfb
+ nop.m 0
+ // add b to estimated remainder (to cover the case when the quotient was
+ // overestimated)
+ // also set correct sign by using
+ // FR_B_SGN_A = |b|*sgn(a), FR_ROUNDCONST = sgn(a)
+ (p6) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, FR_B_SGN_A
+ nop.b 0
+}
+{ .mfb
+ nop.m 0
+ // set correct sign of result before returning: FR_ROUNDCONST = sgn(a)
+ (p10) fma.s0 f8 = FR_QREM, FR_ROUNDCONST, f0
+ (p8) br.ret.sptk b0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // if f13! = RZ(Q), get alternative quotient estimation: a''*y2
+ (p7) fma.s1 FR_Q = FR_ABS_A, FR_Y_INV, f0
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ // if FR_E1 was RZ(Q), set remainder to FR_E1
+ (p9) fma.s1 FR_ABS_A = FR_E1, f1, f0
+ br.cond.sptk loop64
+}
+;;
+
+FMOD_A_NAN_INF:
+
+// b zero ?
+{ .mfi
+ nop.m 0
+ fclass.m p10, p0 = f8, 0xc3 // Test a = nan
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_NORM_B = f9, f1, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s0 f8 = f8, f1, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ (p10) fclass.m p10, p0 = f9, 0x07 // Test x = nan, and y = zero
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fcmp.eq.unc.s1 p11, p0 = FR_NORM_B, f0
+ (p10) br.ret.spnt b0 // Exit with result = a if a = nan and b = zero
+}
+;;
+
+{ .mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FMOD_B_ZERO
+}
+;;
+
+// a= infinity? Return QNAN indefinite
+{ .mfi
+ // set p7 t0 0
+ cmp.ne p7, p0 = r0, r0
+ fclass.m.unc p8, p9 = f8, 0x23
+ nop.i 0
+}
+;;
+
+// b NaN ?
+{ .mfi
+ nop.m 0
+ (p8) fclass.m p9, p8 = f9, 0xc3
+ nop.i 0
+}
+;;
+
+// b not pseudo-zero ? (GR_SIG_B holds significand)
+{ .mii
+ nop.m 0
+ (p8) cmp.ne p7, p0 = GR_SIG_B, r0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8, f8
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ (p8) frcpa.s0 f8, p0 = f8, f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // also set Denormal flag if necessary
+ (p7) fnma.s0 f9 = f9, f1, f9
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ (p8) fma.s0 f8 = f8, f1, f0
+ nop.b 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ (p9) frcpa.s0 f8, p7 = f8, f9
+ (p9) br.ret.sptk b0
+}
+{ .mmb
+ alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0
+ mov GR_Parameter_TAG = 273
+ br.sptk __libm_error_region
+}
+;;
+
+FMOD_B_NAN_INF_ZERO:
+// b INF
+{ .mfi
+ nop.m 0
+ fclass.m.unc p7, p0 = f9, 0x23
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ (p7) fma.s0 f8 = f8, f1, f0
+ (p7) br.ret.spnt b0
+}
+;;
+
+// b NAN?
+{ .mfi
+ nop.m 0
+ fclass.m.unc p9, p10 = f9, 0xc3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ (p10) fclass.nm p9, p0 = f9, 0xff
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ (p9) fma.s0 f8 = f9, f1, f0
+ (p9) br.ret.spnt b0
+}
+;;
+
+FMOD_B_ZERO:
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+{ .mfi
+ nop.m 0
+ // set Invalid
+ frcpa.s0 FR_TMP, p0 = f0, f0
+ nop.i 0
+}
+;;
+
+// a NAN?
+{ .mfi
+ nop.m 0
+ fclass.m.unc p9, p10 = f8, 0xc3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ alloc GR_ARPFS = ar.pfs, 1, 4, 4, 0
+ (p10) fclass.nm p9, p10 = f8, 0xff
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ (p9) frcpa.s0 FR_TMP2, p7 = f8, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ (p10) frcpa.s0 FR_TMP2, p7 = f9, f9
+ mov GR_Parameter_TAG = 120
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8, f8
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s0 f8 = FR_TMP2, f1, f0
+ br.sptk __libm_error_region
+}
+;;
+
+GLOBAL_IEEE754_END(fmodl)
+libm_alias_ldouble_other (__fmod, fmod)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y = -32, sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs, GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp = -64, sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP = gp // Save gp
+}
+;;
+
+{ .mmi
+ stfe [ GR_Parameter_Y ] = FR_Y, 16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16, sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0 = b0 // Save b0
+}
+;;
+
+.body
+{ .mib
+ stfe [ GR_Parameter_X ] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0, GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [ GR_Parameter_Y ] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16, GR_Parameter_Y
+ br.call.sptk b0 = __libm_error_support# // Call error handling function
+}
+;;
+
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48, sp
+}
+;;
+
+{ .mmi
+ ldfe f8 = [ GR_Parameter_RESULT ] // Get return result off stack
+.restore sp
+ add sp = 64, sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+}
+;;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+}
+;;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#, @function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_gamma_r.c b/sysdeps/ia64/fpu/e_gamma_r.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_gamma_r.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_gammaf_r.c b/sysdeps/ia64/fpu/e_gammaf_r.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_gammaf_r.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_gammal_r.c b/sysdeps/ia64/fpu/e_gammal_r.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_gammal_r.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_hypot.S b/sysdeps/ia64/fpu/e_hypot.S
new file mode 100644
index 0000000000..19442ff9c9
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_hypot.S
@@ -0,0 +1,438 @@
+.file "hypot.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 02/02/00 hand-optimized
+// 04/04/00 Unwind support added
+// 06/20/00 new version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Added missing mutex directive
+//
+//*********************************************************************
+// ___________
+// Function: hypot(x,y) = |(x^2 + y^2) = for double precision values
+// x and y
+// Also provides cabs functionality.
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9 (Input)
+// f6 -f15, f32-f34
+//
+// General Purpose Registers:
+// r2,r3,r29 (Scratch)
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6 - p10
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// Overflow can occur.
+// hypot(Infinity and anything) = +Infinity
+// hypot(QNaN and anything) = QNaN
+// hypot(SNaN and anything ) = QNaN
+//
+//*********************************************************************
+//
+// Implementation:
+// x2 = x * x in double-extended
+// y2 = y * y in double-extended
+// temp = x2 + y2 in double-extended
+// sqrt(temp) rounded to double
+//
+//*********************************************************************
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+FR_X = f32
+FR_Y = f33
+FR_RESULT = f8
+
+.section .text
+
+LOCAL_LIBM_ENTRY(cabs)
+LOCAL_LIBM_END(cabs)
+
+GLOBAL_IEEE754_ENTRY(hypot)
+
+{.mfi
+ alloc r32= ar.pfs,0,4,4,0
+ // Compute x*x
+ fma.s1 f10=f8,f8,f0
+ // r2=bias-1
+ mov r2=0xfffe
+}
+{.mfi
+ // 63/8
+ mov r3=0x40fc //0000
+ // y*y
+ fma.s1 f11=f9,f9,f0
+ // r29=429/16
+ mov r29=0x41d68;; //000
+}
+
+{ .mfi
+ nop.m 0
+// Check if x is an Inf - if so return Inf even
+// if y is a NaN (C9X)
+ fclass.m.unc p7, p6 = f8, 0x023
+ shl r3=r3,16
+}
+{.mfi
+ nop.m 0
+ // if possible overflow, copy f8 to f32
+ // set Denormal, if necessary
+ // (p8)
+ fma.d.s0 f32=f8,f1,f0
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+// Check if y is an Inf - if so return Inf even
+// if x is a NaN (C9X)
+ fclass.m.unc p8, p9 = f9, 0x023
+ shl r29=r29,12
+}
+{ .mfb
+ // f7=0.5
+ setf.exp f7=r2
+// For x=inf, multiply y by 1 to raise invalid on y an SNaN
+// (p7) fma.s0 f9=f9,f1,f0
+ // copy f9 to f33; set Denormal, if necessary
+ fma.d.s0 f33=f9,f1,f0
+ nop.b 0;;
+}
+{.mfb
+ // f13=63/8
+ setf.s f13=r3
+ // is y Zero ?
+ (p6) fclass.m p6,p0=f9,0x7
+ nop.b 0
+}
+{.mlx
+ nop.m 0
+ movl r2=0x408c0000;;
+}
+
+{.mfi
+ // f34=429/16
+ setf.s f34=r29
+ // is x Zero ?
+ (p9) fclass.m p9,p0=f8,0x7
+ // 231/16
+ mov r3=0x4167;; //0000
+}
+{.mfi
+ nop.m 0
+ // a=x2+y2
+ fma.s1 f12=f10,f1,f11
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // y not NaN ?
+ (p9) fclass.m p8,p0=f9,0x3f
+ shl r3=r3,16
+}
+{.mfi
+ nop.m 0
+ // f6=2
+ fma.s1 f6=f1,f1,f1
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // x not NaN ?
+ (p6) fclass.m p7,p0=f8,0x3f
+ nop.i 0;;
+}
+{.mfi
+ // f9=35/8
+ setf.s f9=r2
+ nop.f 0
+ // 2*emax-2
+ mov r2=0x107fb;;
+}
+
+.pred.rel "mutex",p7,p8
+{.mfb
+ nop.m 0
+ // if f8=Infinity or f9=Zero, return |f8|
+ (p7) fmerge.s f8=f0,f32
+ (p7) br.ret.spnt b0
+}
+{.mfb
+ nop.m 0
+ // if f9=Infinity or f8=Zero, return |f9|
+ (p8) fmerge.s f8=f0,f33
+ (p8) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ // f10 =231/16
+ setf.s f10=r3
+ // z0=frsqrta(a)
+ frsqrta.s1 f8,p6=f12
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Identify Natvals, Infs, NaNs, and Zeros
+// and return result
+ fclass.m.unc p7, p0 = f12, 0x1E7
+ nop.i 0;;
+}
+{.mfb
+ // get exponent of x^2+y^2
+ getf.exp r3=f12
+ // if special case, set f8
+ (p7) mov f8=f12
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // S0=a*z0
+ (p6) fma.s1 f14=f12,f8,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H0=0.5*z0
+ (p6) fma.s1 f15=f8,f7,f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // f6=5/2
+ fma.s1 f6=f7,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f11=3/2
+ fma.s1 f11=f7,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // d=0.5-S0*H0
+ (p6) fnma.s1 f7=f14,f15,f7
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P67=231/16+429/16*d
+ (p6) fma.s1 f10=f34,f7,f10
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P45=63/8*d+35/8
+ (p6) fma.s1 f9=f13,f7,f9
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P23=5/2*d+3/2
+ (p6) fma.s1 f11=f6,f7,f11
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // d2=d*d
+ (p6) fma.s1 f13=f7,f7,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P47=d2*P67+P45
+ (p6) fma.s1 f10=f10,f13,f9
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P13=d*P23+1
+ (p6) fma.s1 f11=f11,f7,f1
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // d3=d2*d
+ (p6) fma.s1 f13=f13,f7,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // T0=d*S0
+ (p6) fma.s1 f15=f7,f14,f0
+ nop.i 0
+}
+{.mfi
+ // Is x^2 + y^2 well less than the overflow
+ // threshold?
+ (p6) cmp.lt.unc p7, p8 = r3,r2
+ // P=P13+d3*P47
+ (p6) fma.s1 f10=f13,f10,f11
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // S=P*T0+S0
+ fma.d.s0 f8=f10,f15,f14
+ // No overflow in this case
+ (p7) br.ret.sptk b0;;
+}
+
+{ .mfi
+ nop.m 0
+(p8) fsetc.s2 0x7F,0x42
+ // Possible overflow path, must detect by
+ // Setting widest range exponent with prevailing
+ // rounding mode.
+ nop.i 0 ;;
+}
+
+
+{ .mfi
+ // bias+0x400 (bias+EMAX+1)
+ (p8) mov r2=0x103ff
+ // S=P*T0+S0
+ (p8) fma.d.s2 f12=f10,f15,f14
+ nop.i 0 ;;
+}
+{ .mfi
+(p8) setf.exp f11 = r2
+(p8) fsetc.s2 0x7F,0x40
+// Restore Original Mode in S2
+ nop.i 0 ;;
+}
+{ .mfi
+ nop.m 0
+(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
+ nop.i 0 ;;
+}
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG = 46
+ // No overflow
+(p9) br.ret.sptk b0;;
+}
+GLOBAL_IEEE754_END(hypot)
+libm_alias_double_other (__hypot, hypot)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+LOCAL_LIBM_END(__libm_error_region#)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_hypotf.S b/sysdeps/ia64/fpu/e_hypotf.S
new file mode 100644
index 0000000000..58e93984ae
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_hypotf.S
@@ -0,0 +1,394 @@
+.file "hypotf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 02/02/00 hand-optimized
+// 04/04/00 Unwind support added
+// 06/26/00 new version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/17/03 Added missing mutex directive
+//
+//*********************************************************************
+// ___________
+// Function: hypotf(x,y) = |(x^2 + y^2) = for single precision values
+// x and y
+// Also provides cabsf functionality.
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9 (Input)
+// f6 -f15
+//
+// General Purpose Registers:
+// r2-r3 (Scratch)
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6 - p10
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// Overflow can occur.
+// hypotf(Infinity and anything) = +Infinity
+// hypotf(QNaN and anything) = QNaN
+// hypotf(SNaN and anything ) = QNaN
+//
+//*********************************************************************
+//
+// Implementation:
+// x2 = x * x in double-extended
+// y2 = y * y in double-extended
+// temp = x2 + y2 in double-extended
+// sqrt(temp) rounded to single precision
+//
+//*********************************************************************
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+FR_X = f14
+FR_Y = f15
+FR_RESULT = f8
+
+.section .text
+
+LOCAL_LIBM_ENTRY(cabsf)
+LOCAL_LIBM_END(cabsf)
+
+GLOBAL_IEEE754_ENTRY(hypotf)
+{.mfi
+ alloc r32= ar.pfs,0,4,4,0
+ // Compute x*x
+ fma.s1 f10=f8,f8,f0
+ // r2=bias-1
+ mov r2=0xfffe
+}
+{.mfi
+ nop.m 0
+ // y*y
+ fma.s1 f11=f9,f9,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Check if x is an Inf - if so return Inf even
+// if y is a NaN (C9X)
+ fclass.m.unc p7, p6 = f8, 0x023
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // if possible overflow, copy f8 to f14
+ // set Denormal, if necessary
+ // (p8)
+ fma.s.s0 f14=f8,f1,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Check if y is an Inf - if so return Inf even
+// if x is a NaN (C9X)
+ fclass.m.unc p8, p9 = f9, 0x023
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+// For x=inf, multiply y by 1 to raise invalid on y an SNaN
+// (p7) fma.s0 f9=f9,f1,f0
+ // copy f9 to f15; set Denormal, if necessary
+ fma.s.s0 f15=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is y Zero ?
+ (p6) fclass.m p6,p0=f9,0x7
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is x Zero ?
+ (p9) fclass.m p9,p0=f8,0x7
+ nop.i 0;;
+}
+
+{.mfi
+ // f7=0.5
+ setf.exp f7=r2
+ // a=x2+y2
+ fma.s1 f12=f10,f1,f11
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // x not NaN ?
+ (p6) fclass.m p7,p0=f8,0x3f
+ nop.i 0
+}
+{.mfi
+ // 2*emax-2
+ mov r2=0x100fb
+ // f6=2
+ fma.s1 f6=f1,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // y not NaN ?
+ (p9) fclass.m p8,p0=f9,0x3f
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p7,p8
+{.mfb
+ nop.m 0
+ // if f8=Infinity or f9=Zero, return |f8|
+ (p7) fmerge.s f8=f0,f14
+ (p7) br.ret.spnt b0
+}
+{.mfb
+ nop.m 0
+ // if f9=Infinity or f8=Zero, return |f9|
+ (p8) fmerge.s f8=f0,f15
+ (p8) br.ret.spnt b0;;
+}
+
+{ .mfi
+ nop.m 0
+// Identify Natvals, Infs, NaNs, and Zeros
+// and return result
+ fclass.m.unc p7, p0 = f12, 0x1E7
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // z0=frsqrta(a)
+ frsqrta.s1 f8,p6=f12
+ nop.i 0;;
+}
+
+{.mfb
+ // get exponent of x^2+y^2
+ getf.exp r3=f12
+ // if special case, set f8
+ (p7) mov f8=f12
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // S0=a*z0
+ (p6) fma.s1 f12=f12,f8,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H0=0.5*z0
+ (p6) fma.s1 f10=f8,f7,f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // f6=5/2
+ fma.s1 f6=f7,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f11=3/2
+ fma.s1 f11=f7,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // d=0.5-S0*H0
+ (p6) fnma.s1 f7=f12,f10,f7
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P01=d+1
+ (p6) fma.s1 f10=f1,f7,f1
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P23=5/2*d+3/2
+ (p6) fma.s1 f11=f6,f7,f11
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // d2=d*d
+ (p6) fma.s1 f7=f7,f7,f0
+ nop.i 0;;
+}
+
+
+{.mfi
+ // Is x^2 + y^2 well less than the overflow
+ // threshold?
+ (p6) cmp.lt.unc p7, p8 = r3,r2
+ // P=P01+d2*P23
+ (p6) fma.s1 f10=f7,f11,f10
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // S=P*S0
+ fma.s.s0 f8=f10,f12,f0
+ // No overflow in this case
+ (p7) br.ret.sptk b0;;
+}
+
+{ .mfi
+ nop.m 0
+(p8) fsetc.s2 0x7F,0x42
+ // Possible overflow path, must detect by
+ // Setting widest range exponent with prevailing
+ // rounding mode.
+ nop.i 0 ;;
+}
+
+
+{ .mfi
+ // bias+0x400 (bias+EMAX+1)
+ (p8) mov r2=0x1007f
+ // S=P*S0
+ (p8) fma.s.s2 f12=f10,f12,f0
+ nop.i 0 ;;
+}
+{ .mfi
+(p8) setf.exp f11 = r2
+(p8) fsetc.s2 0x7F,0x40
+// Restore Original Mode in S2
+ nop.i 0 ;;
+}
+{ .mfi
+ nop.m 0
+(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
+ nop.i 0 ;;
+}
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG = 47
+ // No overflow
+(p9) br.ret.sptk b0;;
+}
+GLOBAL_IEEE754_END(hypotf)
+libm_alias_float_other (__hypot, hypot)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mii
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ mov GR_Parameter_TAG = 47
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_hypotl.S b/sysdeps/ia64/fpu/e_hypotl.S
new file mode 100644
index 0000000000..6d1a51e701
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_hypotl.S
@@ -0,0 +1,475 @@
+.file "hypotl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 02/02/00 hand-optimized
+// 04/04/00 Unwind support added
+// 06/20/00 new version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+//*********************************************************************
+// ___________
+// Function: hypotl(x,y) = |(x^2 + y^2) = for double extended values
+// x and y
+// Also provides cabsl functionality.
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f9 (Input)
+// f6 -f15, f32-f34
+//
+// General Purpose Registers:
+// r2-r3 (Scratch)
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6 - p10
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// Overflow can occur.
+// hypotl(Infinity and anything) = +Infinity
+// hypotl(QNaN and anything) = QNaN
+// hypotl(SNaN and anything ) = QNaN
+//
+//*********************************************************************
+//
+// Implementation:
+// x2 = x * x in double-extended
+// y2 = y * y in double-extended
+// temp = x2 + y2 in double-extended
+// sqrt(temp) rounded to double extended
+//
+//*********************************************************************
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+FR_X = f32
+FR_Y = f33
+FR_RESULT = f8
+
+.section .text
+
+LOCAL_LIBM_ENTRY(cabsl)
+LOCAL_LIBM_END(cabsl)
+
+GLOBAL_IEEE754_ENTRY(hypotl)
+{.mfi
+ alloc r32= ar.pfs,0,4,4,0
+ // Compute x*x
+ fma.s1 f10=f8,f8,f0
+ // r2=bias-1
+ mov r2=0xfffe
+}
+{.mfi
+ nop.m 0
+ // y*y
+ fma.s1 f11=f9,f9,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 0
+// Check if x is an Inf - if so return Inf even
+// if y is a NaN (C9X)
+ fclass.m.unc p7, p6 = f8, 0x023
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // if possible overflow, copy f8 to f32
+ // set Denormal, if necessary
+ // (p8)
+ fma.s0 f32=f8,f1,f0
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+// Check if y is an Inf - if so return Inf even
+// if x is a NaN (C9X)
+ fclass.m.unc p8, p9 = f9, 0x023
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+// For x=inf, multiply y by 1 to raise invalid on y an SNaN
+// (p7) fma.s0 f9=f9,f1,f0
+ // copy f9 to f33; set Denormal, if necessary
+ fma.s0 f33=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is y Zero ?
+ (p6) fclass.m p6,p0=f9,0x7
+ nop.i 0;;
+}
+
+{.mfi
+ // f7=0.5
+ setf.exp f7=r2
+ // a=x2+y2
+ fma.s1 f12=f10,f1,f11
+ nop.i 0
+}
+{.mfi
+ mov r2=0x408c //0000
+ // dx=x*x-x2
+ fms.s1 f13=f8,f8,f10
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // is x Zero ?
+ (p9) fclass.m p9,p0=f8,0x7
+ shl r2=r2,16
+}
+{.mfi
+ nop.m 0
+ // dy=y*y-y2
+ fms.s1 f14=f9,f9,f11
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // x not NaN ?
+ (p6) fclass.m p7,p0=f8,0x3f
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f6=2
+ fma.s1 f6=f1,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // f34=min(x2,y2)
+ famin.s1 f34=f10,f11
+ nop.i 0
+}
+{.mfb
+ nop.m 0
+ // f10=max(x2,y2)
+ famax.s1 f10=f11,f10
+ nop.b 0;; //
+}
+
+{.mfi
+ nop.m 0
+ // y not NaN ?
+ (p9) fclass.m p8,p0=f9,0x3f
+ nop.i 0;;
+}
+{.mfb
+ // f9=35/8
+ setf.s f9=r2
+ // if f8=Infinity or f9=Zero, return |f8|
+ (p7) fmerge.s f8=f0,f32
+ (p7) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // z0=frsqrta(a)
+ frsqrta.s1 f8,p6=f12
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+// Identify Natvals, Infs, NaNs, and Zeros
+// and return result
+ fclass.m.unc p7, p0 = f12, 0x1E7
+ nop.i 0
+}
+{.mfi
+ // get exponent of x^2+y^2
+ getf.exp r3=f12
+ // dxy=dx+dy
+ fma.s1 f13=f13,f1,f14
+ nop.i 0;;
+}
+
+{.mfb
+ // 2*emax-2
+ mov r2=0x17ffb
+ // if f9=Infinity or f8=Zero, return |f9|
+ (p8) fmerge.s f8=f0,f33
+ (p8) br.ret.spnt b0
+}
+{.mfi
+ nop.m 0
+ // dd=a-max(x2,y2)
+ fnma.s1 f10=f10,f1,f12
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // S0=a*z0
+ (p6) fma.s1 f14=f12,f8,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H0=0.5*z0
+ (p6) fma.s1 f15=f8,f7,f0
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // if special case, set f8
+ (p7) mov f8=f12
+ (p7) br.ret.spnt b0
+}
+{.mfi
+ nop.m 0
+ // da=min(x2,y2)-dd
+ fnma.s1 f10=f10,f1,f34
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // f6=5/2
+ fma.s1 f6=f7,f1,f6
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // f11=3/2
+ fma.s1 f11=f7,f1,f1
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // d=0.5-S0*H0
+ (p6) fnma.s1 f7=f14,f15,f7
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P1=3/2*d+1
+ (p6) fma.s1 f11=f11,f7,f1
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P2=35/8*d+5/2
+ (p6) fma.s1 f9=f9,f7,f6
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // d2=d*d
+ (p6) fma.s1 f34=f7,f7,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // T0=d*S0
+ (p6) fma.s1 f6=f7,f14,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // G0=d*H0
+ (p6) fma.s1 f7=f7,f15,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P=d2*P2+P1
+ (p6) fma.s1 f11=f34,f9,f11
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // S1=p*T0+S0
+ (p6) fma.s1 f14=f11,f6,f14
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // H1=p*G0+H0
+ (p6) fma.s1 f15=f11,f7,f15
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // e1=a-S1*S1
+ (p6) fnma.s1 f7=f14,f14,f12
+ nop.i 0
+}
+{.mfi
+ // Is x^2 + y^2 well less than the overflow
+ // threshold?
+ (p6) cmp.lt.unc p7, p8 = r3,r2
+ // c=dxy+da
+ (p6) fma.s1 f13=f13,f1,f10
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // e=e1+c
+ (p6) fma.s1 f13=f7,f1,f13
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // S=e*H1+S1
+ fma.s0 f8=f13,f15,f14
+ // No overflow in this case
+ (p7) br.ret.sptk b0;;
+}
+
+{ .mfi
+ nop.m 0
+(p8) fsetc.s2 0x7F,0x42
+ // Possible overflow path, must detect by
+ // Setting widest range exponent with prevailing
+ // rounding mode.
+ nop.i 0 ;;
+}
+
+
+{ .mfi
+ // bias+0x4000 (bias+EMAX+1)
+ (p8) mov r2=0x13fff
+ // S=e*H1+S1
+ (p8) fma.s2 f12=f13,f15,f14
+ nop.i 0 ;;
+}
+{ .mfi
+(p8) setf.exp f11 = r2
+(p8) fsetc.s2 0x7F,0x40
+// Restore Original Mode in S2
+ nop.i 0 ;;
+}
+{ .mfi
+ nop.m 0
+(p8) fcmp.lt.unc.s1 p9, p10 = f12, f11
+ nop.i 0 ;;
+}
+{ .mib
+ nop.m 0
+ mov GR_Parameter_TAG = 45;
+ // No overflow
+(p9) br.ret.sptk b0;;
+}
+GLOBAL_IEEE754_END(hypotl)
+libm_alias_ldouble_other (__hypot, hypot)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+LOCAL_LIBM_END(__libm_error_region#)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_ilogbl.S b/sysdeps/ia64/fpu/e_ilogbl.S
new file mode 100644
index 0000000000..68a17139fa
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_ilogbl.S
@@ -0,0 +1,267 @@
+.file "ilogbl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/03/00 Initial version
+// 05/26/00 Fix bug when x a double-extended denormal;
+// if x=0 call error routine, per C9X
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 01/20/01 Fixed result for x=0
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 01/20/03 Improved performance
+//
+// API
+//==============================================================
+// int ilogbl( long double x );
+//
+// Overview of operation
+//==============================================================
+// The ilogbl function extracts the exponent of x as an integer
+// and returns it in r8
+//
+// ilogbl is similar to logbl but differs in the following ways:
+// +-inf
+// ilogbl: returns INT_MAX
+// logbl: returns +inf
+// Nan returns FP_LOGBNAN (which is either INT_MAX or INT_MIN)
+// ilogbl: returns INT_MAX (7fffffff)
+// logbl: returns QNAN (quietized SNAN)
+// 0 returns FP_ILOGB0 (which is either INT_MIN or -INT_MAX)
+// ilogbl: returns -INT_MAX (80000001)
+// logbl: returns -inf, raises the divide-by-zero exception,
+// and calls libm_error_support to set domain error
+//
+// Registers used
+//==============================================================
+// general registers used:
+// r26 -> r39
+// r36 -> r39 used as parameters to error path
+//
+// predicate registers used:
+// p6 -> p10
+// floating-point registers used:
+// f9, f10, f11
+// f8, input
+
+rExpBias = r26
+rExpMask = r27
+rSignexp_x = r28
+rExp_x = r29
+rIntMax = r30
+rExp_2to64 = r31
+
+GR_SAVE_PFS = r32
+rTrialResult = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r36
+GR_Parameter_Y = r37
+GR_Parameter_RESULT = r38
+GR_Parameter_TAG = r39
+
+fTmp = f9
+fNorm_x = f10
+f2to64 = f11
+
+.section .text
+GLOBAL_LIBM_ENTRY(__ieee754_ilogbl)
+
+// X NORMAL
+// TrueExp_x = exp(f8) - 0xffff
+// r8 = TrueExp_x
+{ .mfi
+ getf.exp rSignexp_x = f8
+ fclass.m p8,p0 = f8, 0x0b // Test for x unorm
+ mov rExpBias = 0xffff // Exponent bias
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNorm_x = f8
+ mov rExpMask = 0x1ffff // Exponent mask
+}
+;;
+
+// Form signexp of 2^64 in case need to scale denormal
+{ .mfb
+ mov rExp_2to64 = 0x1003f
+ fclass.m p6,p9 = f8, 0x1e3 // Test x natval, nan, inf
+(p8) br.cond.spnt ILOGB_DENORM // Branch if x unorm
+}
+;;
+
+ILOGB_COMMON:
+// Return here from ILOGB_DENORM
+{ .mfi
+ and rExp_x = rSignexp_x, rExpMask // Get biased exponent
+ fclass.m p7,p10 = f8, 0x07 // Test x zero
+ nop.i 0
+}
+{ .mlx
+ nop.m 0
+ movl rIntMax = 0x000000007fffffff // Form INT_MAX
+}
+;;
+
+.pred.rel "mutex",p6,p9
+{ .mfi
+(p9) sub r8 = rExp_x, rExpBias // Get true exponent for normal path
+(p6) fma.s0 fTmp = f8, f8, f0 // Dummy to set Invalid flag
+(p6) mov r8 = rIntMax // If nan, inf, return INT_MAX
+}
+{ .mbb
+ nop.m 0
+(p7) br.cond.spnt ILOGB_ZERO // Branch if x zero
+(p10) br.ret.sptk b0 // Exit if x not zero
+}
+;;
+
+
+ILOGB_DENORM:
+// Form 2^64 in case need to scale denormal
+// Check to see if double-extended denormal
+{ .mfi
+ setf.exp f2to64 = rExp_2to64
+ fclass.m p8,p0 = fNorm_x, 0x0b
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p7,p0 = f8, f0 // Dummy op to set denormal flag
+ nop.i 0
+}
+;;
+
+// If double-extended denormal add 64 to exponent bias for scaling
+// If double-extended denormal form x * 2^64 which is normal
+{ .mfi
+(p8) add rExpBias = 64, rExpBias
+(p8) fmpy.s1 fNorm_x = fNorm_x, f2to64
+ nop.i 0
+}
+;;
+
+// Logic is the same as normal path but use normalized input
+{ .mib
+ getf.exp rSignexp_x = fNorm_x
+ nop.i 0
+ br.cond.sptk ILOGB_COMMON // Return to main path
+}
+;;
+
+ILOGB_ZERO:
+// Here if x zero
+// Return INT_MIN, call error support
+
+{ .mlx
+ alloc r32=ar.pfs,1,3,4,0
+ movl rTrialResult = 0x0000000080000000
+}
+{ .mib
+ mov GR_Parameter_TAG = 156 // Error code
+ nop.i 0
+ br.cond.sptk __libm_error_region // Call error support
+}
+;;
+
+GLOBAL_LIBM_END(__ieee754_ilogbl)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = f9 // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ mov r8 = rTrialResult
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_lgamma_r.c b/sysdeps/ia64/fpu/e_lgamma_r.c
new file mode 100644
index 0000000000..b80af25a4d
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_lgamma_r.c
@@ -0,0 +1,70 @@
+/* file: lgamma_r.c */
+
+
+// Copyright (c) 2002 Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//
+
+// History
+//==============================================================
+// 02/04/02: Initial version
+// 02/22/02: Removed lgammaf_r, gammaf_r
+/*
+// FUNCTIONS: double lgamma_r(double x, int* signgam)
+// double gamma_r(double x, int* signgam)
+// Natural logarithm of GAMMA function
+*/
+
+#include "libm_support.h"
+
+
+extern double __libm_lgamma(double /*x*/, int* /*signgam*/, int /*signgamsz*/);
+
+
+double __ieee754_lgamma_r(double x, int* signgam)
+{
+ return __libm_lgamma(x, signgam, sizeof(*signgam));
+}
+libm_alias_double_r (__ieee754_lgamma, lgamma, _r)
+
+#ifndef _LIBC
+double __ieee754_gamma_r(double x, int* signgam)
+{
+ return __libm_lgamma(x, signgam, sizeof(*signgam));
+}
+weak_alias (__ieee754_gamma_r, gamma_r)
+#endif
diff --git a/sysdeps/ia64/fpu/e_lgammaf_r.c b/sysdeps/ia64/fpu/e_lgammaf_r.c
new file mode 100644
index 0000000000..3e9aab0823
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_lgammaf_r.c
@@ -0,0 +1,70 @@
+/* file: lgammaf_r.c */
+
+
+// Copyright (c) 2002 Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//
+
+// History
+//==============================================================
+// 02/04/02: Initial version
+// 02/22/02: Removed lgamma_r, gamma_r
+/*
+// FUNCTIONS: float lgammaf_r(float x, int* signgam)
+// float gammaf_r(float x, int* signgam)
+// Natural logarithm of GAMMA function
+*/
+
+#include "libm_support.h"
+
+
+extern float __libm_lgammaf(float /*x*/, int* /*signgam*/, int /*signgamsz*/);
+
+
+float __ieee754_lgammaf_r(float x, int* signgam)
+{
+ return __libm_lgammaf(x, signgam, sizeof(*signgam));
+}
+libm_alias_float_r (__ieee754_lgamma, lgamma, _r)
+
+#ifndef _LIBC
+float __ieee754_gammaf_r(float x, int* signgam)
+{
+ return __libm_lgammaf(x, signgam, sizeof(*signgam));
+}
+weak_alias (__ieee754_gammaf_r, gammaf_r)
+#endif
diff --git a/sysdeps/ia64/fpu/e_lgammal_r.c b/sysdeps/ia64/fpu/e_lgammal_r.c
new file mode 100644
index 0000000000..6467cb75fe
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_lgammal_r.c
@@ -0,0 +1,69 @@
+/* file: lgammal_r.c */
+
+
+// Copyright (c) 2002 Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//
+
+// History
+//==============================================================
+// 08/15/02: Initial version
+/*
+// FUNCTIONS: long double lgammal_r(long double x, int* signgam)
+// long double gammal_r(long double x, int* signgam)
+// Natural logarithm of GAMMA function
+*/
+
+#include "libm_support.h"
+
+
+extern double __libm_lgammal(long double /*x*/, int* /*signgam*/, int /*signgamsz*/);
+
+
+long double __ieee754_lgammal_r(long double x, int* signgam)
+{
+ return __libm_lgammal(x, signgam, sizeof(*signgam));
+}
+libm_alias_ldouble_r (__ieee754_lgamma, lgamma, _r)
+
+#ifndef _LIBC
+long double __ieee754_gammal_r(long double x, int* signgam)
+{
+ return __libm_lgammal(x, signgam, sizeof(*signgam));
+}
+weak_alias (__ieee754_gammal_r, gammal_r)
+#endif
diff --git a/sysdeps/ia64/fpu/e_log.S b/sysdeps/ia64/fpu/e_log.S
new file mode 100644
index 0000000000..0275c7baec
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log.S
@@ -0,0 +1,1735 @@
+.file "log.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 06/16/00 Updated table to be rounded correctly
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 08/17/00 Improved speed of main path by 5 cycles
+// Shortened path for x=1.0
+// 01/09/01 Improved speed, fixed flags for neg denormals
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 05/23/02 Modified algorithm. Now only one polynomial is used
+// for |x-1| >= 1/256 and for |x-1| < 1/256
+// 12/11/02 Improved performance for Itanium 2
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//==============================================================
+// double log(double)
+// double log10(double)
+//
+//
+// Overview of operation
+//==============================================================
+// Background
+// ----------
+//
+// This algorithm is based on fact that
+// log(a b) = log(a) + log(b).
+// In our case we have x = 2^N f, where 1 <= f < 2.
+// So
+// log(x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
+//
+// To calculate log(f) we do following
+// log(f) = log(f * frcpa(f) / frcpa(f)) =
+// = log(f * frcpa(f)) + log(1/frcpa(f))
+//
+// According to definition of IA-64's frcpa instruction it's a
+// floating point that approximates 1/f using a lookup on the
+// top of 8 bits of the input number's significand with relative
+// error < 2^(-8.886). So we have following
+//
+// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
+//
+// and
+//
+// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
+// = log(1 + r) + T
+//
+// The first value can be computed by polynomial P(r) approximating
+// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
+// value defined by top 8 bit of f.
+//
+// Finally we have that log(x) ~ (N*log(2) + T) + P(r)
+//
+// Note that if input argument is close to 1.0 (in our case it means
+// that |1 - x| < 1/256) we can use just polynomial approximation
+// because x = 2^0 * f = f = 1 + r and
+// log(x) = log(1 + r) ~ P(r)
+//
+//
+// To compute log10(x) we use the simple identity
+//
+// log10(x) = log(x)/log(10)
+//
+// so we have that
+//
+// log10(x) = (N*log(2) + T + log(1+r)) / log(10) =
+// = N*(log(2)/log(10)) + (T/log(10)) + log(1 + r)/log(10)
+//
+//
+// Implementation
+// --------------
+// It can be seen that formulas for log and log10 differ from one another
+// only by coefficients and tabular values. Namely as log as log10 are
+// calculated as (N*L1 + T) + L2*Series(r) where in case of log
+// L1 = log(2)
+// T = log(1/frcpa(x))
+// L2 = 1.0
+// and in case of log10
+// L1 = log(2)/log(10)
+// T = log(1/frcpa(x))/log(10)
+// L2 = 1.0/log(10)
+//
+// So common code with two different entry points those set pointers
+// to the base address of corresponding data sets containing values
+// of L2,T and prepare integer representation of L1 needed for following
+// setf instruction.
+//
+// Note that both log and log10 use common approximation polynomial
+// it means we need only one set of coefficients of approximation.
+//
+//
+// 1. |x-1| >= 1/256
+// InvX = frcpa(x)
+// r = InvX*x - 1
+// P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
+// all coefficients are calculated in quad and rounded to double
+// precision. A7,A6,A5,A4 are stored in memory whereas A3 and A2
+// created with setf.
+//
+// N = float(n) where n is true unbiased exponent of x
+//
+// T is tabular value of log(1/frcpa(x)) calculated in quad precision
+// and represented by two floating-point numbers 64-bit Thi and 32-bit Tlo.
+// To load Thi,Tlo we get bits from 55 to 62 of register format significand
+// as index and calculate two addresses
+// ad_Thi = Thi_table_base_addr + 8 * index
+// ad_Tlo = Tlo_table_base_addr + 4 * index
+//
+// L2 (1.0 or 1.0/log(10) depending on function) is calculated in quad
+// precision and rounded to double extended; it's loaded from memory.
+//
+// L1 (log(2) or log10(2) depending on function) is calculated in quad
+// precision and represented by two floating-point 64-bit numbers L1hi,L1lo
+// stored in memory.
+//
+// And final result = ((L1hi*N + Thi) + (N*L1lo + Tlo)) + L2*P(r)
+//
+//
+// 2. |x-1| < 1/256
+// r = x - 1
+// P(r) = r*((r*A3 - A2) + r^4*((A4 + r*A5) + r^2*(A6 + r*A7)),
+// A7,A6,A5A4,A3,A2 are the same as in case |x-1| >= 1/256
+//
+// And final results
+// log(x) = P(r)
+// log10(x) = L2*P(r)
+//
+// 3. How we define is input argument such that |x-1| < 1/256 or not.
+//
+// To do it we analyze biased exponent and integer representation of
+// input argument
+//
+// a) First we test is biased exponent equal to 0xFFFE or 0xFFFF (i.e.
+// we test is 0.5 <= x < 2). This comparison can be performed using
+// unsigned version of cmp instruction in such a way
+// biased_exponent_of_x - 0xFFFE < 2
+//
+//
+// b) Second (in case when result of a) is true) we need to compare x
+// with 1-1/256 and 1+1/256 or in double precision memory representation
+// with 0x3FEFE00000000000 and 0x3FF0100000000000 correspondingly.
+// This comparison can be made like in a), using unsigned
+// version of cmp i.e. ix - 0x3FEFE00000000000 < 0x0000300000000000.
+// 0x0000300000000000 is difference between 0x3FF0100000000000 and
+// 0x3FEFE00000000000
+//
+// Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
+// filtered and processed on special branches.
+//
+
+//
+// Special values
+//==============================================================
+//
+// log(+0) = -inf
+// log(-0) = -inf
+//
+// log(+qnan) = +qnan
+// log(-qnan) = -qnan
+// log(+snan) = +qnan
+// log(-snan) = -qnan
+//
+// log(-n) = QNAN Indefinite
+// log(-inf) = QNAN Indefinite
+//
+// log(+inf) = +inf
+//
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f7 -> f15, f32 -> f42
+//
+// General registers used:
+// r8 -> r11
+// r14 -> r23
+//
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//==============================================================
+GR_TAG = r8
+GR_ad_1 = r8
+GR_ad_2 = r9
+GR_Exp = r10
+GR_N = r11
+
+GR_x = r14
+GR_dx = r15
+GR_NearOne = r15
+GR_xorg = r16
+GR_mask = r16
+GR_05 = r17
+GR_A3 = r18
+GR_Sig = r19
+GR_Ind = r19
+GR_Nm1 = r20
+GR_bias = r21
+GR_ad_3 = r22
+GR_rexp = r23
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+
+FR_NormX = f7
+FR_RcpX = f9
+FR_tmp = f9
+FR_r = f10
+FR_r2 = f11
+FR_r4 = f12
+FR_N = f13
+FR_Ln2hi = f14
+FR_Ln2lo = f15
+
+FR_A7 = f32
+FR_A6 = f33
+FR_A5 = f34
+FR_A4 = f35
+FR_A3 = f36
+FR_A2 = f37
+
+FR_Thi = f38
+FR_NxLn2hipThi = f38
+FR_NxLn2pT = f38
+FR_Tlo = f39
+FR_NxLn2lopTlo = f39
+
+FR_InvLn10 = f40
+FR_A32 = f41
+FR_A321 = f42
+
+
+FR_Y = f1
+FR_X = f10
+FR_RESULT = f8
+
+
+// Data
+//==============================================================
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(log_data)
+// coefficients of polynomial approximation
+data8 0x3FC2494104381A8E // A7
+data8 0xBFC5556D556BBB69 // A6
+//
+// two parts of ln(2)
+data8 0x3FE62E42FEF00000,0x3DD473DE6AF278ED
+//
+data8 0x8000000000000000,0x3FFF // 1.0
+//
+data8 0x3FC999999988B5E9 // A5
+data8 0xBFCFFFFFFFF6FFF5 // A4
+//
+// hi parts of ln(1/frcpa(1+i/256)), i=0...255
+data8 0x3F60040155D5889D // 0
+data8 0x3F78121214586B54 // 1
+data8 0x3F841929F96832EF // 2
+data8 0x3F8C317384C75F06 // 3
+data8 0x3F91A6B91AC73386 // 4
+data8 0x3F95BA9A5D9AC039 // 5
+data8 0x3F99D2A8074325F3 // 6
+data8 0x3F9D6B2725979802 // 7
+data8 0x3FA0C58FA19DFAA9 // 8
+data8 0x3FA2954C78CBCE1A // 9
+data8 0x3FA4A94D2DA96C56 // 10
+data8 0x3FA67C94F2D4BB58 // 11
+data8 0x3FA85188B630F068 // 12
+data8 0x3FAA6B8ABE73AF4C // 13
+data8 0x3FAC441E06F72A9E // 14
+data8 0x3FAE1E6713606D06 // 15
+data8 0x3FAFFA6911AB9300 // 16
+data8 0x3FB0EC139C5DA600 // 17
+data8 0x3FB1DBD2643D190B // 18
+data8 0x3FB2CC7284FE5F1C // 19
+data8 0x3FB3BDF5A7D1EE64 // 20
+data8 0x3FB4B05D7AA012E0 // 21
+data8 0x3FB580DB7CEB5701 // 22
+data8 0x3FB674F089365A79 // 23
+data8 0x3FB769EF2C6B568D // 24
+data8 0x3FB85FD927506A47 // 25
+data8 0x3FB9335E5D594988 // 26
+data8 0x3FBA2B0220C8E5F4 // 27
+data8 0x3FBB0004AC1A86AB // 28
+data8 0x3FBBF968769FCA10 // 29
+data8 0x3FBCCFEDBFEE13A8 // 30
+data8 0x3FBDA727638446A2 // 31
+data8 0x3FBEA3257FE10F79 // 32
+data8 0x3FBF7BE9FEDBFDE5 // 33
+data8 0x3FC02AB352FF25F3 // 34
+data8 0x3FC097CE579D204C // 35
+data8 0x3FC1178E8227E47B // 36
+data8 0x3FC185747DBECF33 // 37
+data8 0x3FC1F3B925F25D41 // 38
+data8 0x3FC2625D1E6DDF56 // 39
+data8 0x3FC2D1610C868139 // 40
+data8 0x3FC340C59741142E // 41
+data8 0x3FC3B08B6757F2A9 // 42
+data8 0x3FC40DFB08378003 // 43
+data8 0x3FC47E74E8CA5F7C // 44
+data8 0x3FC4EF51F6466DE4 // 45
+data8 0x3FC56092E02BA516 // 46
+data8 0x3FC5D23857CD74D4 // 47
+data8 0x3FC6313A37335D76 // 48
+data8 0x3FC6A399DABBD383 // 49
+data8 0x3FC70337DD3CE41A // 50
+data8 0x3FC77654128F6127 // 51
+data8 0x3FC7E9D82A0B022D // 52
+data8 0x3FC84A6B759F512E // 53
+data8 0x3FC8AB47D5F5A30F // 54
+data8 0x3FC91FE49096581B // 55
+data8 0x3FC981634011AA75 // 56
+data8 0x3FC9F6C407089664 // 57
+data8 0x3FCA58E729348F43 // 58
+data8 0x3FCABB55C31693AC // 59
+data8 0x3FCB1E104919EFD0 // 60
+data8 0x3FCB94EE93E367CA // 61
+data8 0x3FCBF851C067555E // 62
+data8 0x3FCC5C0254BF23A5 // 63
+data8 0x3FCCC000C9DB3C52 // 64
+data8 0x3FCD244D99C85673 // 65
+data8 0x3FCD88E93FB2F450 // 66
+data8 0x3FCDEDD437EAEF00 // 67
+data8 0x3FCE530EFFE71012 // 68
+data8 0x3FCEB89A1648B971 // 69
+data8 0x3FCF1E75FADF9BDE // 70
+data8 0x3FCF84A32EAD7C35 // 71
+data8 0x3FCFEB2233EA07CD // 72
+data8 0x3FD028F9C7035C1C // 73
+data8 0x3FD05C8BE0D9635A // 74
+data8 0x3FD085EB8F8AE797 // 75
+data8 0x3FD0B9C8E32D1911 // 76
+data8 0x3FD0EDD060B78080 // 77
+data8 0x3FD122024CF0063F // 78
+data8 0x3FD14BE2927AECD4 // 79
+data8 0x3FD180618EF18ADF // 80
+data8 0x3FD1B50BBE2FC63B // 81
+data8 0x3FD1DF4CC7CF242D // 82
+data8 0x3FD214456D0EB8D4 // 83
+data8 0x3FD23EC5991EBA49 // 84
+data8 0x3FD2740D9F870AFB // 85
+data8 0x3FD29ECDABCDFA03 // 86
+data8 0x3FD2D46602ADCCEE // 87
+data8 0x3FD2FF66B04EA9D4 // 88
+data8 0x3FD335504B355A37 // 89
+data8 0x3FD360925EC44F5C // 90
+data8 0x3FD38BF1C3337E74 // 91
+data8 0x3FD3C25277333183 // 92
+data8 0x3FD3EDF463C1683E // 93
+data8 0x3FD419B423D5E8C7 // 94
+data8 0x3FD44591E0539F48 // 95
+data8 0x3FD47C9175B6F0AD // 96
+data8 0x3FD4A8B341552B09 // 97
+data8 0x3FD4D4F39089019F // 98
+data8 0x3FD501528DA1F967 // 99
+data8 0x3FD52DD06347D4F6 // 100
+data8 0x3FD55A6D3C7B8A89 // 101
+data8 0x3FD5925D2B112A59 // 102
+data8 0x3FD5BF406B543DB1 // 103
+data8 0x3FD5EC433D5C35AD // 104
+data8 0x3FD61965CDB02C1E // 105
+data8 0x3FD646A84935B2A1 // 106
+data8 0x3FD6740ADD31DE94 // 107
+data8 0x3FD6A18DB74A58C5 // 108
+data8 0x3FD6CF31058670EC // 109
+data8 0x3FD6F180E852F0B9 // 110
+data8 0x3FD71F5D71B894EF // 111
+data8 0x3FD74D5AEFD66D5C // 112
+data8 0x3FD77B79922BD37D // 113
+data8 0x3FD7A9B9889F19E2 // 114
+data8 0x3FD7D81B037EB6A6 // 115
+data8 0x3FD8069E33827230 // 116
+data8 0x3FD82996D3EF8BCA // 117
+data8 0x3FD85855776DCBFA // 118
+data8 0x3FD8873658327CCE // 119
+data8 0x3FD8AA75973AB8CE // 120
+data8 0x3FD8D992DC8824E4 // 121
+data8 0x3FD908D2EA7D9511 // 122
+data8 0x3FD92C59E79C0E56 // 123
+data8 0x3FD95BD750EE3ED2 // 124
+data8 0x3FD98B7811A3EE5B // 125
+data8 0x3FD9AF47F33D406B // 126
+data8 0x3FD9DF270C1914A7 // 127
+data8 0x3FDA0325ED14FDA4 // 128
+data8 0x3FDA33440224FA78 // 129
+data8 0x3FDA57725E80C382 // 130
+data8 0x3FDA87D0165DD199 // 131
+data8 0x3FDAAC2E6C03F895 // 132
+data8 0x3FDADCCC6FDF6A81 // 133
+data8 0x3FDB015B3EB1E790 // 134
+data8 0x3FDB323A3A635948 // 135
+data8 0x3FDB56FA04462909 // 136
+data8 0x3FDB881AA659BC93 // 137
+data8 0x3FDBAD0BEF3DB164 // 138
+data8 0x3FDBD21297781C2F // 139
+data8 0x3FDC039236F08818 // 140
+data8 0x3FDC28CB1E4D32FC // 141
+data8 0x3FDC4E19B84723C1 // 142
+data8 0x3FDC7FF9C74554C9 // 143
+data8 0x3FDCA57B64E9DB05 // 144
+data8 0x3FDCCB130A5CEBAF // 145
+data8 0x3FDCF0C0D18F326F // 146
+data8 0x3FDD232075B5A201 // 147
+data8 0x3FDD490246DEFA6B // 148
+data8 0x3FDD6EFA918D25CD // 149
+data8 0x3FDD9509707AE52F // 150
+data8 0x3FDDBB2EFE92C554 // 151
+data8 0x3FDDEE2F3445E4AE // 152
+data8 0x3FDE148A1A2726CD // 153
+data8 0x3FDE3AFC0A49FF3F // 154
+data8 0x3FDE6185206D516D // 155
+data8 0x3FDE882578823D51 // 156
+data8 0x3FDEAEDD2EAC990C // 157
+data8 0x3FDED5AC5F436BE2 // 158
+data8 0x3FDEFC9326D16AB8 // 159
+data8 0x3FDF2391A21575FF // 160
+data8 0x3FDF4AA7EE03192C // 161
+data8 0x3FDF71D627C30BB0 // 162
+data8 0x3FDF991C6CB3B379 // 163
+data8 0x3FDFC07ADA69A90F // 164
+data8 0x3FDFE7F18EB03D3E // 165
+data8 0x3FE007C053C5002E // 166
+data8 0x3FE01B942198A5A0 // 167
+data8 0x3FE02F74400C64EA // 168
+data8 0x3FE04360BE7603AC // 169
+data8 0x3FE05759AC47FE33 // 170
+data8 0x3FE06B5F1911CF51 // 171
+data8 0x3FE078BF0533C568 // 172
+data8 0x3FE08CD9687E7B0E // 173
+data8 0x3FE0A10074CF9019 // 174
+data8 0x3FE0B5343A234476 // 175
+data8 0x3FE0C974C89431CD // 176
+data8 0x3FE0DDC2305B9886 // 177
+data8 0x3FE0EB524BAFC918 // 178
+data8 0x3FE0FFB54213A475 // 179
+data8 0x3FE114253DA97D9F // 180
+data8 0x3FE128A24F1D9AFF // 181
+data8 0x3FE1365252BF0864 // 182
+data8 0x3FE14AE558B4A92D // 183
+data8 0x3FE15F85A19C765B // 184
+data8 0x3FE16D4D38C119FA // 185
+data8 0x3FE18203C20DD133 // 186
+data8 0x3FE196C7BC4B1F3A // 187
+data8 0x3FE1A4A738B7A33C // 188
+data8 0x3FE1B981C0C9653C // 189
+data8 0x3FE1CE69E8BB106A // 190
+data8 0x3FE1DC619DE06944 // 191
+data8 0x3FE1F160A2AD0DA3 // 192
+data8 0x3FE2066D7740737E // 193
+data8 0x3FE2147DBA47A393 // 194
+data8 0x3FE229A1BC5EBAC3 // 195
+data8 0x3FE237C1841A502E // 196
+data8 0x3FE24CFCE6F80D9A // 197
+data8 0x3FE25B2C55CD5762 // 198
+data8 0x3FE2707F4D5F7C40 // 199
+data8 0x3FE285E0842CA383 // 200
+data8 0x3FE294294708B773 // 201
+data8 0x3FE2A9A2670AFF0C // 202
+data8 0x3FE2B7FB2C8D1CC0 // 203
+data8 0x3FE2C65A6395F5F5 // 204
+data8 0x3FE2DBF557B0DF42 // 205
+data8 0x3FE2EA64C3F97654 // 206
+data8 0x3FE3001823684D73 // 207
+data8 0x3FE30E97E9A8B5CC // 208
+data8 0x3FE32463EBDD34E9 // 209
+data8 0x3FE332F4314AD795 // 210
+data8 0x3FE348D90E7464CF // 211
+data8 0x3FE35779F8C43D6D // 212
+data8 0x3FE36621961A6A99 // 213
+data8 0x3FE37C299F3C366A // 214
+data8 0x3FE38AE2171976E7 // 215
+data8 0x3FE399A157A603E7 // 216
+data8 0x3FE3AFCCFE77B9D1 // 217
+data8 0x3FE3BE9D503533B5 // 218
+data8 0x3FE3CD7480B4A8A2 // 219
+data8 0x3FE3E3C43918F76C // 220
+data8 0x3FE3F2ACB27ED6C6 // 221
+data8 0x3FE4019C2125CA93 // 222
+data8 0x3FE4181061389722 // 223
+data8 0x3FE42711518DF545 // 224
+data8 0x3FE436194E12B6BF // 225
+data8 0x3FE445285D68EA69 // 226
+data8 0x3FE45BCC464C893A // 227
+data8 0x3FE46AED21F117FC // 228
+data8 0x3FE47A1527E8A2D3 // 229
+data8 0x3FE489445EFFFCCB // 230
+data8 0x3FE4A018BCB69835 // 231
+data8 0x3FE4AF5A0C9D65D7 // 232
+data8 0x3FE4BEA2A5BDBE87 // 233
+data8 0x3FE4CDF28F10AC46 // 234
+data8 0x3FE4DD49CF994058 // 235
+data8 0x3FE4ECA86E64A683 // 236
+data8 0x3FE503C43CD8EB68 // 237
+data8 0x3FE513356667FC57 // 238
+data8 0x3FE522AE0738A3D7 // 239
+data8 0x3FE5322E26867857 // 240
+data8 0x3FE541B5CB979809 // 241
+data8 0x3FE55144FDBCBD62 // 242
+data8 0x3FE560DBC45153C6 // 243
+data8 0x3FE5707A26BB8C66 // 244
+data8 0x3FE587F60ED5B8FF // 245
+data8 0x3FE597A7977C8F31 // 246
+data8 0x3FE5A760D634BB8A // 247
+data8 0x3FE5B721D295F10E // 248
+data8 0x3FE5C6EA94431EF9 // 249
+data8 0x3FE5D6BB22EA86F5 // 250
+data8 0x3FE5E6938645D38F // 251
+data8 0x3FE5F673C61A2ED1 // 252
+data8 0x3FE6065BEA385926 // 253
+data8 0x3FE6164BFA7CC06B // 254
+data8 0x3FE62643FECF9742 // 255
+//
+// lo parts of ln(1/frcpa(1+i/256)), i=0...255
+data4 0x20E70672 // 0
+data4 0x1F60A5D0 // 1
+data4 0x218EABA0 // 2
+data4 0x21403104 // 3
+data4 0x20E9B54E // 4
+data4 0x21EE1382 // 5
+data4 0x226014E3 // 6
+data4 0x2095E5C9 // 7
+data4 0x228BA9D4 // 8
+data4 0x22932B86 // 9
+data4 0x22608A57 // 10
+data4 0x220209F3 // 11
+data4 0x212882CC // 12
+data4 0x220D46E2 // 13
+data4 0x21FA4C28 // 14
+data4 0x229E5BD9 // 15
+data4 0x228C9838 // 16
+data4 0x2311F954 // 17
+data4 0x221365DF // 18
+data4 0x22BD0CB3 // 19
+data4 0x223D4BB7 // 20
+data4 0x22A71BBE // 21
+data4 0x237DB2FA // 22
+data4 0x23194C9D // 23
+data4 0x22EC639E // 24
+data4 0x2367E669 // 25
+data4 0x232E1D5F // 26
+data4 0x234A639B // 27
+data4 0x2365C0E0 // 28
+data4 0x234646C1 // 29
+data4 0x220CBF9C // 30
+data4 0x22A00FD4 // 31
+data4 0x2306A3F2 // 32
+data4 0x23745A9B // 33
+data4 0x2398D756 // 34
+data4 0x23DD0B6A // 35
+data4 0x23DE338B // 36
+data4 0x23A222DF // 37
+data4 0x223164F8 // 38
+data4 0x23B4E87B // 39
+data4 0x23D6CCB8 // 40
+data4 0x220C2099 // 41
+data4 0x21B86B67 // 42
+data4 0x236D14F1 // 43
+data4 0x225A923F // 44
+data4 0x22748723 // 45
+data4 0x22200D13 // 46
+data4 0x23C296EA // 47
+data4 0x2302AC38 // 48
+data4 0x234B1996 // 49
+data4 0x2385E298 // 50
+data4 0x23175BE5 // 51
+data4 0x2193F482 // 52
+data4 0x23BFEA90 // 53
+data4 0x23D70A0C // 54
+data4 0x231CF30A // 55
+data4 0x235D9E90 // 56
+data4 0x221AD0CB // 57
+data4 0x22FAA08B // 58
+data4 0x23D29A87 // 59
+data4 0x20C4B2FE // 60
+data4 0x2381B8B7 // 61
+data4 0x23F8D9FC // 62
+data4 0x23EAAE7B // 63
+data4 0x2329E8AA // 64
+data4 0x23EC0322 // 65
+data4 0x2357FDCB // 66
+data4 0x2392A9AD // 67
+data4 0x22113B02 // 68
+data4 0x22DEE901 // 69
+data4 0x236A6D14 // 70
+data4 0x2371D33E // 71
+data4 0x2146F005 // 72
+data4 0x23230B06 // 73
+data4 0x22F1C77D // 74
+data4 0x23A89FA3 // 75
+data4 0x231D1241 // 76
+data4 0x244DA96C // 77
+data4 0x23ECBB7D // 78
+data4 0x223E42B4 // 79
+data4 0x23801BC9 // 80
+data4 0x23573263 // 81
+data4 0x227C1158 // 82
+data4 0x237BD749 // 83
+data4 0x21DDBAE9 // 84
+data4 0x23401735 // 85
+data4 0x241D9DEE // 86
+data4 0x23BC88CB // 87
+data4 0x2396D5F1 // 88
+data4 0x23FC89CF // 89
+data4 0x2414F9A2 // 90
+data4 0x2474A0F5 // 91
+data4 0x24354B60 // 92
+data4 0x23C1EB40 // 93
+data4 0x2306DD92 // 94
+data4 0x24353B6B // 95
+data4 0x23CD1701 // 96
+data4 0x237C7A1C // 97
+data4 0x245793AA // 98
+data4 0x24563695 // 99
+data4 0x23C51467 // 100
+data4 0x24476B68 // 101
+data4 0x212585A9 // 102
+data4 0x247B8293 // 103
+data4 0x2446848A // 104
+data4 0x246A53F8 // 105
+data4 0x246E496D // 106
+data4 0x23ED1D36 // 107
+data4 0x2314C258 // 108
+data4 0x233244A7 // 109
+data4 0x245B7AF0 // 110
+data4 0x24247130 // 111
+data4 0x22D67B38 // 112
+data4 0x2449F620 // 113
+data4 0x23BBC8B8 // 114
+data4 0x237D3BA0 // 115
+data4 0x245E8F13 // 116
+data4 0x2435573F // 117
+data4 0x242DE666 // 118
+data4 0x2463BC10 // 119
+data4 0x2466587D // 120
+data4 0x2408144B // 121
+data4 0x2405F0E5 // 122
+data4 0x22381CFF // 123
+data4 0x24154F9B // 124
+data4 0x23A4E96E // 125
+data4 0x24052967 // 126
+data4 0x2406963F // 127
+data4 0x23F7D3CB // 128
+data4 0x2448AFF4 // 129
+data4 0x24657A21 // 130
+data4 0x22FBC230 // 131
+data4 0x243C8DEA // 132
+data4 0x225DC4B7 // 133
+data4 0x23496EBF // 134
+data4 0x237C2B2B // 135
+data4 0x23A4A5B1 // 136
+data4 0x2394E9D1 // 137
+data4 0x244BC950 // 138
+data4 0x23C7448F // 139
+data4 0x2404A1AD // 140
+data4 0x246511D5 // 141
+data4 0x24246526 // 142
+data4 0x23111F57 // 143
+data4 0x22868951 // 144
+data4 0x243EB77F // 145
+data4 0x239F3DFF // 146
+data4 0x23089666 // 147
+data4 0x23EBFA6A // 148
+data4 0x23C51312 // 149
+data4 0x23E1DD5E // 150
+data4 0x232C0944 // 151
+data4 0x246A741F // 152
+data4 0x2414DF8D // 153
+data4 0x247B5546 // 154
+data4 0x2415C980 // 155
+data4 0x24324ABD // 156
+data4 0x234EB5E5 // 157
+data4 0x2465E43E // 158
+data4 0x242840D1 // 159
+data4 0x24444057 // 160
+data4 0x245E56F0 // 161
+data4 0x21AE30F8 // 162
+data4 0x23FB3283 // 163
+data4 0x247A4D07 // 164
+data4 0x22AE314D // 165
+data4 0x246B7727 // 166
+data4 0x24EAD526 // 167
+data4 0x24B41DC9 // 168
+data4 0x24EE8062 // 169
+data4 0x24A0C7C4 // 170
+data4 0x24E8DA67 // 171
+data4 0x231120F7 // 172
+data4 0x24401FFB // 173
+data4 0x2412DD09 // 174
+data4 0x248C131A // 175
+data4 0x24C0A7CE // 176
+data4 0x243DD4C8 // 177
+data4 0x24457FEB // 178
+data4 0x24DEEFBB // 179
+data4 0x243C70AE // 180
+data4 0x23E7A6FA // 181
+data4 0x24C2D311 // 182
+data4 0x23026255 // 183
+data4 0x2437C9B9 // 184
+data4 0x246BA847 // 185
+data4 0x2420B448 // 186
+data4 0x24C4CF5A // 187
+data4 0x242C4981 // 188
+data4 0x24DE1525 // 189
+data4 0x24F5CC33 // 190
+data4 0x235A85DA // 191
+data4 0x24A0B64F // 192
+data4 0x244BA0A4 // 193
+data4 0x24AAF30A // 194
+data4 0x244C86F9 // 195
+data4 0x246D5B82 // 196
+data4 0x24529347 // 197
+data4 0x240DD008 // 198
+data4 0x24E98790 // 199
+data4 0x2489B0CE // 200
+data4 0x22BC29AC // 201
+data4 0x23F37C7A // 202
+data4 0x24987FE8 // 203
+data4 0x22AFE20B // 204
+data4 0x24C8D7C2 // 205
+data4 0x24B28B7D // 206
+data4 0x23B6B271 // 207
+data4 0x24C77CB6 // 208
+data4 0x24EF1DCA // 209
+data4 0x24A4F0AC // 210
+data4 0x24CF113E // 211
+data4 0x2496BBAB // 212
+data4 0x23C7CC8A // 213
+data4 0x23AE3961 // 214
+data4 0x2410A895 // 215
+data4 0x23CE3114 // 216
+data4 0x2308247D // 217
+data4 0x240045E9 // 218
+data4 0x24974F60 // 219
+data4 0x242CB39F // 220
+data4 0x24AB8D69 // 221
+data4 0x23436788 // 222
+data4 0x24305E9E // 223
+data4 0x243E71A9 // 224
+data4 0x23C2A6B3 // 225
+data4 0x23FFE6CF // 226
+data4 0x2322D801 // 227
+data4 0x24515F21 // 228
+data4 0x2412A0D6 // 229
+data4 0x24E60D44 // 230
+data4 0x240D9251 // 231
+data4 0x247076E2 // 232
+data4 0x229B101B // 233
+data4 0x247B12DE // 234
+data4 0x244B9127 // 235
+data4 0x2499EC42 // 236
+data4 0x21FC3963 // 237
+data4 0x23E53266 // 238
+data4 0x24CE102D // 239
+data4 0x23CC45D2 // 240
+data4 0x2333171D // 241
+data4 0x246B3533 // 242
+data4 0x24931129 // 243
+data4 0x24405FFA // 244
+data4 0x24CF464D // 245
+data4 0x237095CD // 246
+data4 0x24F86CBD // 247
+data4 0x24E2D84B // 248
+data4 0x21ACBB44 // 249
+data4 0x24F43A8C // 250
+data4 0x249DB931 // 251
+data4 0x24A385EF // 252
+data4 0x238B1279 // 253
+data4 0x2436213E // 254
+data4 0x24F18A3B // 255
+LOCAL_OBJECT_END(log_data)
+
+
+LOCAL_OBJECT_START(log10_data)
+// coefficients of polynoimal approximation
+data8 0x3FC2494104381A8E // A7
+data8 0xBFC5556D556BBB69 // A6
+//
+// two parts of ln(2)/ln(10)
+data8 0x3FD3441350900000, 0x3DCEF3FDE623E256
+//
+data8 0xDE5BD8A937287195,0x3FFD // 1/ln(10)
+//
+data8 0x3FC999999988B5E9 // A5
+data8 0xBFCFFFFFFFF6FFF5 // A4
+//
+// Hi parts of ln(1/frcpa(1+i/256))/ln(10), i=0...255
+data8 0x3F4BD27045BFD024 // 0
+data8 0x3F64E84E793A474A // 1
+data8 0x3F7175085AB85FF0 // 2
+data8 0x3F787CFF9D9147A5 // 3
+data8 0x3F7EA9D372B89FC8 // 4
+data8 0x3F82DF9D95DA961C // 5
+data8 0x3F866DF172D6372B // 6
+data8 0x3F898D79EF5EEDEF // 7
+data8 0x3F8D22ADF3F9579C // 8
+data8 0x3F9024231D30C398 // 9
+data8 0x3F91F23A98897D49 // 10
+data8 0x3F93881A7B818F9E // 11
+data8 0x3F951F6E1E759E35 // 12
+data8 0x3F96F2BCE7ADC5B4 // 13
+data8 0x3F988D362CDF359E // 14
+data8 0x3F9A292BAF010981 // 15
+data8 0x3F9BC6A03117EB97 // 16
+data8 0x3F9D65967DE3AB08 // 17
+data8 0x3F9F061167FC31E7 // 18
+data8 0x3FA05409E4F7819B // 19
+data8 0x3FA125D0432EA20D // 20
+data8 0x3FA1F85D440D299B // 21
+data8 0x3FA2AD755749617C // 22
+data8 0x3FA381772A00E603 // 23
+data8 0x3FA45643E165A70A // 24
+data8 0x3FA52BDD034475B8 // 25
+data8 0x3FA5E3966B7E9295 // 26
+data8 0x3FA6BAAF47C5B244 // 27
+data8 0x3FA773B3E8C4F3C7 // 28
+data8 0x3FA84C51EBEE8D15 // 29
+data8 0x3FA906A6786FC1CA // 30
+data8 0x3FA9C197ABF00DD6 // 31
+data8 0x3FAA9C78712191F7 // 32
+data8 0x3FAB58C09C8D637C // 33
+data8 0x3FAC15A8BCDD7B7E // 34
+data8 0x3FACD331E2C2967B // 35
+data8 0x3FADB11ED766ABF4 // 36
+data8 0x3FAE70089346A9E6 // 37
+data8 0x3FAF2F96C6754AED // 38
+data8 0x3FAFEFCA8D451FD5 // 39
+data8 0x3FB0585283764177 // 40
+data8 0x3FB0B913AAC7D3A6 // 41
+data8 0x3FB11A294F2569F5 // 42
+data8 0x3FB16B51A2696890 // 43
+data8 0x3FB1CD03ADACC8BD // 44
+data8 0x3FB22F0BDD7745F5 // 45
+data8 0x3FB2916ACA38D1E7 // 46
+data8 0x3FB2F4210DF7663C // 47
+data8 0x3FB346A6C3C49065 // 48
+data8 0x3FB3A9FEBC605409 // 49
+data8 0x3FB3FD0C10A3AA54 // 50
+data8 0x3FB46107D3540A81 // 51
+data8 0x3FB4C55DD16967FE // 52
+data8 0x3FB51940330C000A // 53
+data8 0x3FB56D620EE7115E // 54
+data8 0x3FB5D2ABCF26178D // 55
+data8 0x3FB6275AA5DEBF81 // 56
+data8 0x3FB68D4EAF26D7EE // 57
+data8 0x3FB6E28C5C54A28D // 58
+data8 0x3FB7380B9665B7C7 // 59
+data8 0x3FB78DCCC278E85B // 60
+data8 0x3FB7F50C2CF25579 // 61
+data8 0x3FB84B5FD5EAEFD7 // 62
+data8 0x3FB8A1F6BAB2B226 // 63
+data8 0x3FB8F8D144557BDF // 64
+data8 0x3FB94FEFDCD61D92 // 65
+data8 0x3FB9A752EF316149 // 66
+data8 0x3FB9FEFAE7611EDF // 67
+data8 0x3FBA56E8325F5C86 // 68
+data8 0x3FBAAF1B3E297BB3 // 69
+data8 0x3FBB079479C372AC // 70
+data8 0x3FBB6054553B12F7 // 71
+data8 0x3FBBB95B41AB5CE5 // 72
+data8 0x3FBC12A9B13FE079 // 73
+data8 0x3FBC6C4017382BEA // 74
+data8 0x3FBCB41FBA42686C // 75
+data8 0x3FBD0E38CE73393E // 76
+data8 0x3FBD689B2193F132 // 77
+data8 0x3FBDC3472B1D285F // 78
+data8 0x3FBE0C06300D528B // 79
+data8 0x3FBE6738190E394B // 80
+data8 0x3FBEC2B50D208D9A // 81
+data8 0x3FBF0C1C2B936827 // 82
+data8 0x3FBF68216C9CC726 // 83
+data8 0x3FBFB1F6381856F3 // 84
+data8 0x3FC00742AF4CE5F8 // 85
+data8 0x3FC02C64906512D2 // 86
+data8 0x3FC05AF1E63E03B4 // 87
+data8 0x3FC0804BEA723AA8 // 88
+data8 0x3FC0AF1FD6711526 // 89
+data8 0x3FC0D4B2A88059FF // 90
+data8 0x3FC0FA5EF136A06C // 91
+data8 0x3FC1299A4FB3E305 // 92
+data8 0x3FC14F806253C3EC // 93
+data8 0x3FC175805D1587C1 // 94
+data8 0x3FC19B9A637CA294 // 95
+data8 0x3FC1CB5FC26EDE16 // 96
+data8 0x3FC1F1B4E65F2590 // 97
+data8 0x3FC218248B5DC3E5 // 98
+data8 0x3FC23EAED62ADC76 // 99
+data8 0x3FC26553EBD337BC // 100
+data8 0x3FC28C13F1B118FF // 101
+data8 0x3FC2BCAA14381385 // 102
+data8 0x3FC2E3A740B7800E // 103
+data8 0x3FC30ABFD8F333B6 // 104
+data8 0x3FC331F403985096 // 105
+data8 0x3FC35943E7A6068F // 106
+data8 0x3FC380AFAC6E7C07 // 107
+data8 0x3FC3A8377997B9E5 // 108
+data8 0x3FC3CFDB771C9ADB // 109
+data8 0x3FC3EDA90D39A5DE // 110
+data8 0x3FC4157EC09505CC // 111
+data8 0x3FC43D7113FB04C0 // 112
+data8 0x3FC4658030AD1CCE // 113
+data8 0x3FC48DAC404638F5 // 114
+data8 0x3FC4B5F56CBBB869 // 115
+data8 0x3FC4DE5BE05E7582 // 116
+data8 0x3FC4FCBC0776FD85 // 117
+data8 0x3FC525561E9256EE // 118
+data8 0x3FC54E0DF3198865 // 119
+data8 0x3FC56CAB7112BDE2 // 120
+data8 0x3FC59597BA735B15 // 121
+data8 0x3FC5BEA23A506FD9 // 122
+data8 0x3FC5DD7E08DE382E // 123
+data8 0x3FC606BDD3F92355 // 124
+data8 0x3FC6301C518A501E // 125
+data8 0x3FC64F3770618915 // 126
+data8 0x3FC678CC14C1E2D7 // 127
+data8 0x3FC6981005ED2947 // 128
+data8 0x3FC6C1DB5F9BB335 // 129
+data8 0x3FC6E1488ECD2880 // 130
+data8 0x3FC70B4B2E7E41B8 // 131
+data8 0x3FC72AE209146BF8 // 132
+data8 0x3FC7551C81BD8DCF // 133
+data8 0x3FC774DD76CC43BD // 134
+data8 0x3FC79F505DB00E88 // 135
+data8 0x3FC7BF3BDE099F30 // 136
+data8 0x3FC7E9E7CAC437F8 // 137
+data8 0x3FC809FE4902D00D // 138
+data8 0x3FC82A2757995CBD // 139
+data8 0x3FC85525C625E098 // 140
+data8 0x3FC8757A79831887 // 141
+data8 0x3FC895E2058D8E02 // 142
+data8 0x3FC8C13437695531 // 143
+data8 0x3FC8E1C812EF32BE // 144
+data8 0x3FC9026F112197E8 // 145
+data8 0x3FC923294888880A // 146
+data8 0x3FC94EEA4B8334F2 // 147
+data8 0x3FC96FD1B639FC09 // 148
+data8 0x3FC990CCA66229AB // 149
+data8 0x3FC9B1DB33334842 // 150
+data8 0x3FC9D2FD740E6606 // 151
+data8 0x3FC9FF49EEDCB553 // 152
+data8 0x3FCA209A84FBCFF7 // 153
+data8 0x3FCA41FF1E43F02B // 154
+data8 0x3FCA6377D2CE9377 // 155
+data8 0x3FCA8504BAE0D9F5 // 156
+data8 0x3FCAA6A5EEEBEFE2 // 157
+data8 0x3FCAC85B878D7878 // 158
+data8 0x3FCAEA259D8FFA0B // 159
+data8 0x3FCB0C0449EB4B6A // 160
+data8 0x3FCB2DF7A5C50299 // 161
+data8 0x3FCB4FFFCA70E4D1 // 162
+data8 0x3FCB721CD17157E2 // 163
+data8 0x3FCB944ED477D4EC // 164
+data8 0x3FCBB695ED655C7C // 165
+data8 0x3FCBD8F2364AEC0F // 166
+data8 0x3FCBFB63C969F4FF // 167
+data8 0x3FCC1DEAC134D4E9 // 168
+data8 0x3FCC4087384F4F80 // 169
+data8 0x3FCC6339498F09E1 // 170
+data8 0x3FCC86010FFC076B // 171
+data8 0x3FCC9D3D065C5B41 // 172
+data8 0x3FCCC029375BA079 // 173
+data8 0x3FCCE32B66978BA4 // 174
+data8 0x3FCD0643AFD51404 // 175
+data8 0x3FCD29722F0DEA45 // 176
+data8 0x3FCD4CB70070FE43 // 177
+data8 0x3FCD6446AB3F8C95 // 178
+data8 0x3FCD87B0EF71DB44 // 179
+data8 0x3FCDAB31D1FE99A6 // 180
+data8 0x3FCDCEC96FDC888E // 181
+data8 0x3FCDE69088763579 // 182
+data8 0x3FCE0A4E4A25C1FF // 183
+data8 0x3FCE2E2315755E32 // 184
+data8 0x3FCE461322D1648A // 185
+data8 0x3FCE6A0E95C7787B // 186
+data8 0x3FCE8E216243DD60 // 187
+data8 0x3FCEA63AF26E007C // 188
+data8 0x3FCECA74ED15E0B7 // 189
+data8 0x3FCEEEC692CCD259 // 190
+data8 0x3FCF070A36B8D9C0 // 191
+data8 0x3FCF2B8393E34A2D // 192
+data8 0x3FCF5014EF538A5A // 193
+data8 0x3FCF68833AF1B17F // 194
+data8 0x3FCF8D3CD9F3F04E // 195
+data8 0x3FCFA5C61ADD93E9 // 196
+data8 0x3FCFCAA8567EBA79 // 197
+data8 0x3FCFE34CC8743DD8 // 198
+data8 0x3FD0042BFD74F519 // 199
+data8 0x3FD016BDF6A18017 // 200
+data8 0x3FD023262F907322 // 201
+data8 0x3FD035CCED8D32A1 // 202
+data8 0x3FD042430E869FFB // 203
+data8 0x3FD04EBEC842B2DF // 204
+data8 0x3FD06182E84FD4AB // 205
+data8 0x3FD06E0CB609D383 // 206
+data8 0x3FD080E60BEC8F12 // 207
+data8 0x3FD08D7E0D894735 // 208
+data8 0x3FD0A06CC96A2055 // 209
+data8 0x3FD0AD131F3B3C55 // 210
+data8 0x3FD0C01771E775FB // 211
+data8 0x3FD0CCCC3CAD6F4B // 212
+data8 0x3FD0D986D91A34A8 // 213
+data8 0x3FD0ECA9B8861A2D // 214
+data8 0x3FD0F972F87FF3D5 // 215
+data8 0x3FD106421CF0E5F7 // 216
+data8 0x3FD11983EBE28A9C // 217
+data8 0x3FD12661E35B7859 // 218
+data8 0x3FD13345D2779D3B // 219
+data8 0x3FD146A6F597283A // 220
+data8 0x3FD15399E81EA83D // 221
+data8 0x3FD16092E5D3A9A6 // 222
+data8 0x3FD17413C3B7AB5D // 223
+data8 0x3FD1811BF629D6FA // 224
+data8 0x3FD18E2A47B46685 // 225
+data8 0x3FD19B3EBE1A4418 // 226
+data8 0x3FD1AEE9017CB450 // 227
+data8 0x3FD1BC0CED7134E1 // 228
+data8 0x3FD1C93712ABC7FF // 229
+data8 0x3FD1D66777147D3E // 230
+data8 0x3FD1EA3BD1286E1C // 231
+data8 0x3FD1F77BED932C4C // 232
+data8 0x3FD204C25E1B031F // 233
+data8 0x3FD2120F28CE69B1 // 234
+data8 0x3FD21F6253C48D00 // 235
+data8 0x3FD22CBBE51D60A9 // 236
+data8 0x3FD240CE4C975444 // 237
+data8 0x3FD24E37F8ECDAE7 // 238
+data8 0x3FD25BA8215AF7FC // 239
+data8 0x3FD2691ECC29F042 // 240
+data8 0x3FD2769BFFAB2DFF // 241
+data8 0x3FD2841FC23952C9 // 242
+data8 0x3FD291AA1A384978 // 243
+data8 0x3FD29F3B0E15584A // 244
+data8 0x3FD2B3A0EE479DF7 // 245
+data8 0x3FD2C142842C09E5 // 246
+data8 0x3FD2CEEACCB7BD6C // 247
+data8 0x3FD2DC99CE82FF20 // 248
+data8 0x3FD2EA4F902FD7D9 // 249
+data8 0x3FD2F80C186A25FC // 250
+data8 0x3FD305CF6DE7B0F6 // 251
+data8 0x3FD3139997683CE7 // 252
+data8 0x3FD3216A9BB59E7C // 253
+data8 0x3FD32F4281A3CEFE // 254
+data8 0x3FD33D2150110091 // 255
+//
+// Lo parts of ln(1/frcpa(1+i/256))/ln(10), i=0...255
+data4 0x1FB0EB5A // 0
+data4 0x206E5EE3 // 1
+data4 0x208F3609 // 2
+data4 0x2070EB03 // 3
+data4 0x1F314BAE // 4
+data4 0x217A889D // 5
+data4 0x21E63650 // 6
+data4 0x21C2F4A3 // 7
+data4 0x2192A10C // 8
+data4 0x1F84B73E // 9
+data4 0x2243FBCA // 10
+data4 0x21BD9C51 // 11
+data4 0x213C542B // 12
+data4 0x21047386 // 13
+data4 0x21217D8F // 14
+data4 0x226791B7 // 15
+data4 0x204CCE66 // 16
+data4 0x2234CE9F // 17
+data4 0x220675E2 // 18
+data4 0x22B8E5BA // 19
+data4 0x22C12D14 // 20
+data4 0x211D41F0 // 21
+data4 0x228507F3 // 22
+data4 0x22F7274B // 23
+data4 0x22A7FDD1 // 24
+data4 0x2244A06E // 25
+data4 0x215DCE69 // 26
+data4 0x22F5C961 // 27
+data4 0x22EBEF29 // 28
+data4 0x222A2CB6 // 29
+data4 0x22B9FE00 // 30
+data4 0x22E79EB7 // 31
+data4 0x222F9607 // 32
+data4 0x2189D87F // 33
+data4 0x2236DB45 // 34
+data4 0x22ED77FB // 35
+data4 0x21CB70F0 // 36
+data4 0x21B8ACE8 // 37
+data4 0x22EC58C1 // 38
+data4 0x22CFCC1C // 39
+data4 0x2343E77A // 40
+data4 0x237FBC7F // 41
+data4 0x230D472E // 42
+data4 0x234686FB // 43
+data4 0x23770425 // 44
+data4 0x223977EC // 45
+data4 0x2345800A // 46
+data4 0x237BC351 // 47
+data4 0x23191502 // 48
+data4 0x232BAC12 // 49
+data4 0x22692421 // 50
+data4 0x234D409D // 51
+data4 0x22EC3214 // 52
+data4 0x2376C916 // 53
+data4 0x22B00DD1 // 54
+data4 0x2309D910 // 55
+data4 0x22F925FD // 56
+data4 0x22A63A7B // 57
+data4 0x2106264A // 58
+data4 0x234227F9 // 59
+data4 0x1ECB1978 // 60
+data4 0x23460A62 // 61
+data4 0x232ED4B1 // 62
+data4 0x226DDC38 // 63
+data4 0x1F101A73 // 64
+data4 0x21B1F82B // 65
+data4 0x22752F19 // 66
+data4 0x2320BC15 // 67
+data4 0x236EEC5E // 68
+data4 0x23404D3E // 69
+data4 0x2304C517 // 70
+data4 0x22F7441A // 71
+data4 0x230D3D7A // 72
+data4 0x2264A9DF // 73
+data4 0x22410CC8 // 74
+data4 0x2342CCCB // 75
+data4 0x23560BD4 // 76
+data4 0x237BBFFE // 77
+data4 0x2373A206 // 78
+data4 0x22C871B9 // 79
+data4 0x2354B70C // 80
+data4 0x232EDB33 // 81
+data4 0x235DB680 // 82
+data4 0x230EF422 // 83
+data4 0x235316CA // 84
+data4 0x22EEEE8B // 85
+data4 0x2375C88C // 86
+data4 0x235ABD21 // 87
+data4 0x23A0D232 // 88
+data4 0x23F5FFB5 // 89
+data4 0x23D3CEC8 // 90
+data4 0x22A92204 // 91
+data4 0x238C64DF // 92
+data4 0x23B82896 // 93
+data4 0x22D633B8 // 94
+data4 0x23861E93 // 95
+data4 0x23CB594B // 96
+data4 0x2330387E // 97
+data4 0x21CD4702 // 98
+data4 0x2284C505 // 99
+data4 0x23D6995C // 100
+data4 0x23F6C807 // 101
+data4 0x239CEF5C // 102
+data4 0x239442B0 // 103
+data4 0x22B35EE5 // 104
+data4 0x2391E9A4 // 105
+data4 0x23A390F5 // 106
+data4 0x2349AC9C // 107
+data4 0x23FA5535 // 108
+data4 0x21E3A46A // 109
+data4 0x23B44ABA // 110
+data4 0x23CEA8E0 // 111
+data4 0x23F647DC // 112
+data4 0x2390D1A8 // 113
+data4 0x23D0CFA2 // 114
+data4 0x236E0872 // 115
+data4 0x23B88B91 // 116
+data4 0x2283C359 // 117
+data4 0x232F647F // 118
+data4 0x23122CD7 // 119
+data4 0x232CF564 // 120
+data4 0x232630FD // 121
+data4 0x23BEE1C8 // 122
+data4 0x23B2BD30 // 123
+data4 0x2301F1C0 // 124
+data4 0x23CE4D67 // 125
+data4 0x23A353C9 // 126
+data4 0x238086E8 // 127
+data4 0x22D0D29E // 128
+data4 0x23A3B3C8 // 129
+data4 0x23F69F4B // 130
+data4 0x23EA3C21 // 131
+data4 0x23951C88 // 132
+data4 0x2372AFFC // 133
+data4 0x23A6D1A8 // 134
+data4 0x22BBBAF4 // 135
+data4 0x227FA3DD // 136
+data4 0x23804D9B // 137
+data4 0x232D771F // 138
+data4 0x239CB57B // 139
+data4 0x2303CF34 // 140
+data4 0x22218C2A // 141
+data4 0x23991BEE // 142
+data4 0x23EB3596 // 143
+data4 0x230487FA // 144
+data4 0x2135DF4C // 145
+data4 0x2380FD2D // 146
+data4 0x23EB75E9 // 147
+data4 0x211C62C8 // 148
+data4 0x23F518F1 // 149
+data4 0x23FEF882 // 150
+data4 0x239097C7 // 151
+data4 0x223E2BDA // 152
+data4 0x23988F89 // 153
+data4 0x22E4A4AD // 154
+data4 0x23F03D9C // 155
+data4 0x23F5018F // 156
+data4 0x23E1E250 // 157
+data4 0x23FD3D90 // 158
+data4 0x22DEE2FF // 159
+data4 0x238342AB // 160
+data4 0x22E6736F // 161
+data4 0x233AFC28 // 162
+data4 0x2395F661 // 163
+data4 0x23D8B991 // 164
+data4 0x23CD58D5 // 165
+data4 0x21941FD6 // 166
+data4 0x23352915 // 167
+data4 0x235D09EE // 168
+data4 0x22DC7EF9 // 169
+data4 0x238BC9F3 // 170
+data4 0x2397DF8F // 171
+data4 0x2380A7BB // 172
+data4 0x23EFF48C // 173
+data4 0x21E67408 // 174
+data4 0x236420F7 // 175
+data4 0x22C8DFB5 // 176
+data4 0x239B5D35 // 177
+data4 0x23BDC09D // 178
+data4 0x239E822C // 179
+data4 0x23984F0A // 180
+data4 0x23EF2119 // 181
+data4 0x23F738B8 // 182
+data4 0x23B66187 // 183
+data4 0x23B06AD7 // 184
+data4 0x2369140F // 185
+data4 0x218DACE6 // 186
+data4 0x21DF23F1 // 187
+data4 0x235D8B34 // 188
+data4 0x23460333 // 189
+data4 0x23F11D62 // 190
+data4 0x23C37147 // 191
+data4 0x22B2AE2A // 192
+data4 0x23949211 // 193
+data4 0x23B69799 // 194
+data4 0x23DBEC75 // 195
+data4 0x229A6FB3 // 196
+data4 0x23FC6C60 // 197
+data4 0x22D01FFC // 198
+data4 0x235985F0 // 199
+data4 0x23F7ECA5 // 200
+data4 0x23F924D3 // 201
+data4 0x2381B92F // 202
+data4 0x243A0FBE // 203
+data4 0x24712D72 // 204
+data4 0x24594E2F // 205
+data4 0x220CD12A // 206
+data4 0x23D87FB0 // 207
+data4 0x2338288A // 208
+data4 0x242BB2CC // 209
+data4 0x220F6265 // 210
+data4 0x23BB7FE3 // 211
+data4 0x2301C0A2 // 212
+data4 0x246709AB // 213
+data4 0x23A619E2 // 214
+data4 0x24030E3B // 215
+data4 0x233C36CC // 216
+data4 0x241AAB77 // 217
+data4 0x243D41A3 // 218
+data4 0x23834A60 // 219
+data4 0x236AC7BF // 220
+data4 0x23B6D597 // 221
+data4 0x210E9474 // 222
+data4 0x242156E6 // 223
+data4 0x243A1D68 // 224
+data4 0x2472187C // 225
+data4 0x23834E86 // 226
+data4 0x23CA0807 // 227
+data4 0x24745887 // 228
+data4 0x23E2B0E1 // 229
+data4 0x2421EB67 // 230
+data4 0x23DCC64E // 231
+data4 0x22DF71D1 // 232
+data4 0x238D5ECA // 233
+data4 0x23CDE86F // 234
+data4 0x24131F45 // 235
+data4 0x240FE4E2 // 236
+data4 0x2317731A // 237
+data4 0x24015C76 // 238
+data4 0x2301A4E8 // 239
+data4 0x23E52A6D // 240
+data4 0x247D8A0D // 241
+data4 0x23DFEEBA // 242
+data4 0x22139FEC // 243
+data4 0x2454A112 // 244
+data4 0x23C21E28 // 245
+data4 0x2460D813 // 246
+data4 0x24258924 // 247
+data4 0x2425680F // 248
+data4 0x24194D1E // 249
+data4 0x24242C2F // 250
+data4 0x243DDE5E // 251
+data4 0x23DEB388 // 252
+data4 0x23E0E6EB // 253
+data4 0x24393E74 // 254
+data4 0x241B1863 // 255
+LOCAL_OBJECT_END(log10_data)
+
+
+
+// Code
+//==============================================================
+
+// log has p13 true, p14 false
+// log10 has p14 true, p13 false
+
+.section .text
+GLOBAL_IEEE754_ENTRY(log10)
+{ .mfi
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xFFFE // biased exponent of A2=0.5
+}
+{ .mlx
+ addl GR_ad_1 = @ltoff(log10_data),gp
+ movl GR_A3 = 0x3fd5555555555557 // double precision memory
+ // representation of A3
+};;
+
+{ .mfi
+ getf.sig GR_Sig = f8 // get significand to calculate index
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ mov GR_xorg = 0x3fefe // double precision memory msb of 255/256
+}
+{ .mib
+ ld8 GR_ad_1 = [GR_ad_1]
+ cmp.eq p14,p13 = r0,r0 // set p14 to 1 for log10
+ br.cond.sptk log_log10_common
+};;
+GLOBAL_IEEE754_END(log10)
+libm_alias_double_other (__log10, log10)
+
+
+GLOBAL_IEEE754_ENTRY(log)
+{ .mfi
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xfffe
+}
+{ .mlx
+ addl GR_ad_1 = @ltoff(log_data),gp
+ movl GR_A3 = 0x3fd5555555555557 // double precision memory
+ // representation of A3
+};;
+
+{ .mfi
+ getf.sig GR_Sig = f8 // get significand to calculate index
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ mov GR_xorg = 0x3fefe // double precision memory msb of 255/256
+}
+{ .mfi
+ ld8 GR_ad_1 = [GR_ad_1]
+ nop.f 0
+ cmp.eq p13,p14 = r0,r0 // set p13 to 1 for log
+};;
+
+log_log10_common:
+{ .mfi
+ getf.d GR_x = f8 // double precision memory representation of x
+ fclass.m p9,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
+ dep.z GR_dx = 3, 44, 2 // Create 0x0000300000000000
+ // Difference between double precision
+ // memory representations of 257/256 and
+ // 255/256
+}
+{ .mfi
+ setf.exp FR_A2 = GR_05 // create A2
+ fnorm.s1 FR_NormX = f8
+ mov GR_bias = 0xffff
+};;
+
+{ .mfi
+ setf.d FR_A3 = GR_A3 // create A3
+ fcmp.eq.s1 p12,p0 = f1,f8 // is x equal to 1.0?
+ dep.z GR_xorg = GR_xorg, 44, 19 // 0x3fefe00000000000
+ // double precision memory
+ // representation of 255/256
+}
+{ .mib
+ add GR_ad_2 = 0x30,GR_ad_1 // address of A5,A4
+ add GR_ad_3 = 0x840,GR_ad_1 // address of ln(1/frcpa) lo parts
+(p8) br.cond.spnt log_positive_unorms
+};;
+
+log_core:
+{ .mfi
+ ldfpd FR_A7,FR_A6 = [GR_ad_1],16
+ fclass.m p10,p0 = f8,0x3A // is x < 0?
+ sub GR_Nm1 = GR_Exp,GR_05 // unbiased_exponent_of_x - 1
+}
+{ .mfi
+ ldfpd FR_A5,FR_A4 = [GR_ad_2],16
+(p9) fma.d.s0 f8 = f8,f1,f0 // set V-flag
+ sub GR_N = GR_Exp,GR_bias // unbiased_exponent_of_x
+};;
+
+{ .mfi
+ setf.sig FR_N = GR_N // copy unbiased exponent of x to significand
+ fms.s1 FR_r = FR_RcpX,f8,f1 // range reduction for |x-1|>=1/256
+ extr.u GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
+}
+{ .mib
+ sub GR_x = GR_x, GR_xorg // get diff between x and 255/256
+ cmp.gtu p6, p7 = 2, GR_Nm1 // p6 true if 0.5 <= x < 2
+(p9) br.ret.spnt b0 // exit for NaN, NaT and +Inf
+};;
+
+{ .mfi
+ ldfpd FR_Ln2hi,FR_Ln2lo = [GR_ad_1],16
+ fclass.m p11,p0 = f8,0x07 // is x = 0?
+ shladd GR_ad_3 = GR_Ind,2,GR_ad_3 // address of Tlo
+}
+{ .mib
+ shladd GR_ad_2 = GR_Ind,3,GR_ad_2 // address of Thi
+(p6) cmp.leu p6, p7 = GR_x, GR_dx // 255/256 <= x <= 257/256
+(p10) br.cond.spnt log_negatives // jump if x is negative
+};;
+
+// p6 is true if |x-1| < 1/256
+// p7 is true if |x-1| >= 1/256
+{ .mfi
+ ldfd FR_Thi = [GR_ad_2]
+(p6) fms.s1 FR_r = f8,f1,f1 // range reduction for |x-1|<1/256
+ nop.i 0
+};;
+
+{ .mmi
+(p7) ldfs FR_Tlo = [GR_ad_3]
+ nop.m 0
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p12) fma.d.s0 f8 = f0,f0,f0
+(p12) br.ret.spnt b0 // exit for +1.0
+};;
+
+.pred.rel "mutex",p6,p7
+{ .mfi
+(p6) mov GR_NearOne = 1
+ fms.s1 FR_A32 = FR_A3,FR_r,FR_A2 // A3*r-A2
+(p7) mov GR_NearOne = 0
+}
+{ .mfb
+ ldfe FR_InvLn10 = [GR_ad_1],16
+ fma.s1 FR_r2 = FR_r,FR_r,f0 // r^2
+(p11) br.cond.spnt log_zeroes // jump if x is zero
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_A6 = FR_A7,FR_r,FR_A6 // A7*r+A6
+ nop.i 0
+}
+{ .mfi
+(p7) cmp.eq.unc p9,p0 = r0,r0 // set p9 if |x-1| > 1/256
+ fma.s1 FR_A4 = FR_A5,FR_r,FR_A4 // A5*r+A4
+(p14) cmp.eq.unc p8,p0 = 1,GR_NearOne // set p8 to 1 if it's log10
+ // and argument near 1.0
+};;
+
+{ .mfi
+(p6) getf.exp GR_rexp = FR_r // Get signexp of x-1
+(p7) fcvt.xf FR_N = FR_N
+(p8) cmp.eq p9,p6 = r0,r0 // Also set p9 and clear p6 if log10
+ // and arg near 1
+};;
+
+{ .mfi
+ nop.m 0
+ fma.s1 FR_r4 = FR_r2,FR_r2,f0 // r^4
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p8) fma.s1 FR_NxLn2pT = f0,f0,f0 // Clear NxLn2pT if log10 near 1
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+ // (A3*r+A2)*r^2+r
+ fma.s1 FR_A321 = FR_A32,FR_r2,FR_r
+ mov GR_mask = 0x1ffff
+}
+{ .mfi
+ nop.m 0
+ // (A7*r+A6)*r^2+(A5*r+A4)
+ fma.s1 FR_A4 = FR_A6,FR_r2,FR_A4
+ nop.i 0
+};;
+
+{ .mfi
+(p6) and GR_rexp = GR_rexp, GR_mask
+ // N*Ln2hi+Thi
+(p7) fma.s1 FR_NxLn2hipThi = FR_N,FR_Ln2hi,FR_Thi
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // N*Ln2lo+Tlo
+(p7) fma.s1 FR_NxLn2lopTlo = FR_N,FR_Ln2lo,FR_Tlo
+ nop.i 0
+};;
+
+{ .mfi
+(p6) sub GR_rexp = GR_rexp, GR_bias // unbiased exponent of x-1
+(p9) fma.s1 f8 = FR_A4,FR_r4,FR_A321 // P(r) if |x-1| >= 1/256 or
+ // log10 and |x-1| < 1/256
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // (N*Ln2hi+Thi) + (N*Ln2lo+Tlo)
+(p7) fma.s1 FR_NxLn2pT = FR_NxLn2hipThi,f1,FR_NxLn2lopTlo
+ nop.i 0
+};;
+
+{ .mfi
+(p6) cmp.gt.unc p10, p6 = -40, GR_rexp // Test |x-1| < 2^-40
+ nop.f 0
+ nop.i 0
+};;
+
+{ .mfi
+ nop.m 0
+(p10) fma.d.s0 f8 = FR_A32,FR_r2,FR_r // log(x) if |x-1| < 2^-40
+ nop.i 0
+};;
+
+.pred.rel "mutex",p6,p9
+{ .mfi
+ nop.m 0
+(p6) fma.d.s0 f8 = FR_A4,FR_r4,FR_A321 // log(x) if 2^-40 <= |x-1| < 1/256
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p9) fma.d.s0 f8 = f8,FR_InvLn10,FR_NxLn2pT // result if |x-1| >= 1/256
+ // or log10 and |x-1| < 1/256
+ br.ret.sptk b0
+};;
+
+.align 32
+log_positive_unorms:
+{ .mmf
+ getf.exp GR_Exp = FR_NormX // recompute biased exponent
+ getf.d GR_x = FR_NormX // recompute double precision x
+ fcmp.eq.s1 p12,p0 = f1,FR_NormX // is x equal to 1.0?
+};;
+
+{ .mfb
+ getf.sig GR_Sig = FR_NormX // recompute significand
+ fcmp.eq.s0 p15, p0 = f8, f0 // set denormal flag
+ br.cond.sptk log_core
+};;
+
+.align 32
+log_zeroes:
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fms.s1 FR_tmp = f0,f0,f1 // -1.0
+ nop.i 0
+};;
+
+.pred.rel "mutex",p13,p14
+{ .mfi
+(p13) mov GR_TAG = 2 // set libm error in case of log
+ frcpa.s0 f8,p0 = FR_tmp,f0 // log(+/-0) should be equal to -INF.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of FR_tmp/f0.
+ // As far as FR_tmp is -1 it'll be -INF
+ nop.i 0
+}
+{ .mib
+(p14) mov GR_TAG = 8 // set libm error in case of log10
+ nop.i 0
+ br.cond.sptk log_libm_err
+};;
+
+.align 32
+log_negatives:
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8,f8
+ nop.i 0
+};;
+
+.pred.rel "mutex",p13,p14
+{ .mfi
+(p13) mov GR_TAG = 3 // set libm error in case of log
+ frcpa.s0 f8,p0 = f0,f0 // log(negatives) should be equal to NaN.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f0/f0 i.e. NaN.
+(p14) mov GR_TAG = 9 // set libm error in case of log10
+};;
+
+.align 32
+log_libm_err:
+{ .mmi
+ alloc r32 = ar.pfs,1,4,4,0
+ mov GR_Parameter_TAG = GR_TAG
+ nop.i 0
+};;
+GLOBAL_IEEE754_END(log)
+libm_alias_double_other (__log, log)
+#ifdef SHARED
+.symver log,log@@GLIBC_2.29
+.weak __log_compat
+.set __log_compat,__log
+.symver __log_compat,log@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y = -32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS = ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp = -64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP = gp // Save gp
+};;
+
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0 = b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_log10.c b/sysdeps/ia64/fpu/e_log10.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log10.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log10f.c b/sysdeps/ia64/fpu/e_log10f.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log10f.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log10l.c b/sysdeps/ia64/fpu/e_log10l.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log10l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log2.S b/sysdeps/ia64/fpu/e_log2.S
new file mode 100644
index 0000000000..e855d3101c
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log2.S
@@ -0,0 +1,715 @@
+.file "log2.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//=================================================================
+// 09/11/00 Initial version
+// 03/19/01 Added one polynomial coefficient, to improve accuracy
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 04/18/03 Reformatted T[255]
+//
+// API
+//=================================================================
+// double log2(double)
+//
+// Overview of operation
+//=================================================================
+// Background
+//
+// Implementation
+//
+// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
+// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 (table index)
+// j=0 if f<128; j=1 if f>=128
+// T is a table that stores log2(1/y) (in entries 1..255) rounded to
+// double extended precision; f is used as an index; T[255]=0
+//
+// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
+// and 0 is used instead of T[0]
+// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
+// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
+// for m=2(1-r'), 0<=r'<2^{-9})
+//
+// log2(x) is approximated as
+// (l-j) + T[f] + (c1*r+c2*r^2+...+c7*r^7), if f>0
+//
+
+
+// Special values
+//=================================================================
+// log2(0)=-inf, raises Divide by Zero
+// log2(+inf)=inf
+// log2(x)=NaN, raises Invalid if x<0
+//
+
+
+// Registers used
+//==============================================================
+// f6-f15, f32-f33
+// r2-r3, r23-r30
+// p6,p7,p8,p12
+//
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35 // This reg. can safely be used
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+data8 0xbfd0000000000000, 0x3fc999999999999a //C_4, C_5
+data8 0xbfc5555555555555, 0x3fc2492492492492 //C_6, C_7
+data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
+data8 0xaaaaaaaaaaaaaaab, 0x00003ffd // C_3=1/3
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+data8 0xb8d8752172fed131, 0x00003ff6
+data8 0x8ae7f475764180a3, 0x00003ff8
+data8 0xe7f73862e72ee35d, 0x00003ff8
+data8 0xa2b25310c941a2f2, 0x00003ff9
+data8 0xcbb91d671abb2e85, 0x00003ff9
+data8 0xfac91e34daa50483, 0x00003ff9
+data8 0x9504a5042eb495c5, 0x00003ffa
+data8 0xa9c4a0bbb580ee02, 0x00003ffa
+data8 0xc19264dc8a5e3bf9, 0x00003ffa
+data8 0xd67aa6703ebf4a77, 0x00003ffa
+data8 0xee76cac6d6e08ce7, 0x00003ffa
+data8 0x81c3f7de5434ed04, 0x00003ffb
+data8 0x8c563033a3ce01e4, 0x00003ffb
+data8 0x9876e9f09a98661c, 0x00003ffb
+data8 0xa31e0ac9b2326ce2, 0x00003ffb
+data8 0xadcf09e1fd10e4a5, 0x00003ffb
+data8 0xb889f992cf03cdb6, 0x00003ffb
+data8 0xc34eec68d901a714, 0x00003ffb
+data8 0xce1df524e9909ed9, 0x00003ffb
+data8 0xd8f726bcb0b80ad0, 0x00003ffb
+data8 0xe3da945b878e27d1, 0x00003ffb
+data8 0xeec851633b76a320, 0x00003ffb
+data8 0xf82ea4bb6101421a, 0x00003ffb
+data8 0x8197ddd7736b2864, 0x00003ffc
+data8 0x871dad4f994253f0, 0x00003ffc
+data8 0x8ca8cae3e892d549, 0x00003ffc
+data8 0x916d6e1559a4b697, 0x00003ffc
+data8 0x97028118efabeb7d, 0x00003ffc
+data8 0x9bcfbce1592ad5d5, 0x00003ffc
+data8 0xa16ee95d0da54a91, 0x00003ffc
+data8 0xa644dcf3403fa5d0, 0x00003ffc
+data8 0xab1ee14ffd659064, 0x00003ffc
+data8 0xb0cd12faebcc6757, 0x00003ffc
+data8 0xb5affdf9b3b221e0, 0x00003ffc
+data8 0xba970fb307c6ade1, 0x00003ffc
+data8 0xbf824f3a9f3e7561, 0x00003ffc
+data8 0xc544c055fde99333, 0x00003ffc
+data8 0xca39266532bdf26c, 0x00003ffc
+data8 0xcf31d124b8fa2f56, 0x00003ffc
+data8 0xd42ec7f59017b6ab, 0x00003ffc
+data8 0xd930124bea9a2c67, 0x00003ffc
+data8 0xde35b7af70e4dab3, 0x00003ffc
+data8 0xe33fbfbb8533ef03, 0x00003ffc
+data8 0xe77625911a7dcef3, 0x00003ffc
+data8 0xec884bd689cc12e3, 0x00003ffc
+data8 0xf19eeabf9e99a40a, 0x00003ffc
+data8 0xf6ba0a35e3d88051, 0x00003ffc
+data8 0xfbd9b237f7b4192b, 0x00003ffc
+data8 0x80111d4a1ee0c79e, 0x00003ffd
+data8 0x82a523a5f875bbfc, 0x00003ffd
+data8 0x84ccecdc92cd0815, 0x00003ffd
+data8 0x87653369d92c057a, 0x00003ffd
+data8 0x89ffd1742da3aa21, 0x00003ffd
+data8 0x8c2d2227d053d9b6, 0x00003ffd
+data8 0x8e5c189793f7f798, 0x00003ffd
+data8 0x90fd0a20e72f3c96, 0x00003ffd
+data8 0x932fa937301e59ae, 0x00003ffd
+data8 0x95d5061a5f0f5f7f, 0x00003ffd
+data8 0x980b5a2ef10e7023, 0x00003ffd
+data8 0x9a4361c5514d3c27, 0x00003ffd
+data8 0x9c7d1f7d541313fd, 0x00003ffd
+data8 0x9f2b16040b500d04, 0x00003ffd
+data8 0xa168a0fa9db22c98, 0x00003ffd
+data8 0xa3a7eaa1f9116293, 0x00003ffd
+data8 0xa5e8f5b4072a3d44, 0x00003ffd
+data8 0xa82bc4f11a5e88aa, 0x00003ffd
+data8 0xaa705b2001db8317, 0x00003ffd
+data8 0xacb6bb0e1e0f8005, 0x00003ffd
+data8 0xaefee78f75707221, 0x00003ffd
+data8 0xb148e37ec994dd99, 0x00003ffd
+data8 0xb394b1bdaca0bc17, 0x00003ffd
+data8 0xb5e255349707e496, 0x00003ffd
+data8 0xb831d0d2fda791cc, 0x00003ffd
+data8 0xba83278f6838ab20, 0x00003ffd
+data8 0xbcd65c67881c7d47, 0x00003ffd
+data8 0xbeb3e0f21d72dc92, 0x00003ffd
+data8 0xc10a7a03457d35dc, 0x00003ffd
+data8 0xc362f9b6f51eddd3, 0x00003ffd
+data8 0xc5bd6326ebfce656, 0x00003ffd
+data8 0xc7a0b3d0637c8f97, 0x00003ffd
+data8 0xc9fe96af0df8e4b5, 0x00003ffd
+data8 0xcc5e6c214b4a2cd7, 0x00003ffd
+data8 0xce46199f374d29cf, 0x00003ffd
+data8 0xd0a978a14c0d9ebe, 0x00003ffd
+data8 0xd293fecafec7f9b5, 0x00003ffd
+data8 0xd4faf1f6f5cf32e6, 0x00003ffd
+data8 0xd6e8595abaad34d1, 0x00003ffd
+data8 0xd952eb7a8ffc1593, 0x00003ffd
+data8 0xdb433ccd805f171e, 0x00003ffd
+data8 0xddb178dc43e6bd84, 0x00003ffd
+data8 0xdfa4bcfb333342a4, 0x00003ffd
+data8 0xe19953741ccea015, 0x00003ffd
+data8 0xe40cee16a2ff21c5, 0x00003ffd
+data8 0xe6048470cdbde8ea, 0x00003ffd
+data8 0xe7fd7308d6895b14, 0x00003ffd
+data8 0xe9f7bbb6a1ff9f87, 0x00003ffd
+data8 0xec7280138809433d, 0x00003ffd
+data8 0xee6fda4365cd051f, 0x00003ffd
+data8 0xf06e94a122ff1f12, 0x00003ffd
+data8 0xf26eb1151441fce5, 0x00003ffd
+data8 0xf470318b88a77e2f, 0x00003ffd
+data8 0xf67317f4d4c8aa58, 0x00003ffd
+data8 0xf8f8b250a9c4cde6, 0x00003ffd
+data8 0xfafec54831f1a484, 0x00003ffd
+data8 0xfd06449bf3eaea1e, 0x00003ffd
+data8 0xff0f324ddb19ab67, 0x00003ffd
+data8 0x808cc8320a9acf15, 0x00003ffe
+data8 0x8192b0748f2cef06, 0x00003ffe
+data8 0x829952f5e6a24ee5, 0x00003ffe
+data8 0x83a0b0bfafe1424e, 0x00003ffe
+data8 0x8466b29f9c41caea, 0x00003ffe
+data8 0x856f5aae0881d857, 0x00003ffe
+data8 0x8678c0eae8ee8190, 0x00003ffe
+data8 0x8782e6685676b9d7, 0x00003ffe
+data8 0x888dcc3abc4554ec, 0x00003ffe
+data8 0x89997378de7b98b8, 0x00003ffe
+data8 0x8aa5dd3be1044279, 0x00003ffe
+data8 0x8b6facdfd0360ab8, 0x00003ffe
+data8 0x8c7d6db7169e0cdb, 0x00003ffe
+data8 0x8d8bf424d6e130b2, 0x00003ffe
+data8 0x8e575b506f409fa6, 0x00003ffe
+data8 0x8f673e418776492c, 0x00003ffe
+data8 0x9077e9ed700ef9ba, 0x00003ffe
+data8 0x9144ef1baec80b20, 0x00003ffe
+data8 0x9256fcdb537f035f, 0x00003ffe
+data8 0x9369d68d75e7e1d6, 0x00003ffe
+data8 0x943880613b8f9f1e, 0x00003ffe
+data8 0x954cc1d9e0d94206, 0x00003ffe
+data8 0xd3c70a37bdf7a294, 0x0000bffd
+data8 0xd19bb053fb0284ec, 0x0000bffd
+data8 0xcffa1a3b7dafb8bf, 0x0000bffd
+data8 0xcdcbe1e2776479ee, 0x0000bffd
+data8 0xcc282218b8bfdda2, 0x0000bffd
+data8 0xc9f703a9afcb38ac, 0x0000bffd
+data8 0xc851146ab89593c6, 0x0000bffd
+data8 0xc61d08265927a860, 0x0000bffd
+data8 0xc474e39705912d26, 0x0000bffd
+data8 0xc23de19ec30c6e3e, 0x0000bffd
+data8 0xc09381cc45db45b4, 0x0000bffd
+data8 0xbee82b4e025ff90c, 0x0000bffd
+data8 0xbcace101149788ec, 0x0000bffd
+data8 0xbaff46962ea47964, 0x0000bffd
+data8 0xb950b1be5e0c14a2, 0x0000bffd
+data8 0xb7110e6ce866f2bc, 0x0000bffd
+data8 0xb5602ccc2a81db52, 0x0000bffd
+data8 0xb3ae4ce740fc8ef1, 0x0000bffd
+data8 0xb1fb6d92c8240ccc, 0x0000bffd
+data8 0xafb609c09b244abc, 0x0000bffd
+data8 0xae00d1cfdeb43cfd, 0x0000bffd
+data8 0xac4a967a8c8c9bd0, 0x0000bffd
+data8 0xaa93568c249e6c52, 0x0000bffd
+data8 0xa8db10cdff375343, 0x0000bffd
+data8 0xa68e6fc5a42376e3, 0x0000bffd
+data8 0xa4d3c25e68dc57f2, 0x0000bffd
+data8 0xa3180b0c192a3816, 0x0000bffd
+data8 0xa15b488e7aa329a0, 0x0000bffd
+data8 0x9f9d79a30f0e1d5f, 0x0000bffd
+data8 0x9dde9d050ee7d4ac, 0x0000bffd
+data8 0x9c1eb16d63d7356c, 0x0000bffd
+data8 0x9a5db592a310c36a, 0x0000bffd
+data8 0x989ba82907a9016f, 0x0000bffd
+data8 0x96d887e26cd57b79, 0x0000bffd
+data8 0x9514536e481c3a4f, 0x0000bffd
+data8 0x934f0979a3715fc9, 0x0000bffd
+data8 0x9188a8af1742a9d5, 0x0000bffd
+data8 0x8fc12fb6c470995f, 0x0000bffd
+data8 0x8df89d364e34f8f1, 0x0000bffd
+data8 0x8c2eefd0d3f67dd6, 0x0000bffd
+data8 0x8a642626eb093d54, 0x0000bffd
+data8 0x88983ed6985bae58, 0x0000bffd
+data8 0x86cb387b4a0feec6, 0x0000bffd
+data8 0x84fd11add101024b, 0x0000bffd
+data8 0x83c856dd81804b78, 0x0000bffd
+data8 0x81f84c2c62afd6f1, 0x0000bffd
+data8 0x80271d3e4be5ea5a, 0x0000bffd
+data8 0xfca991447e7b485d, 0x0000bffc
+data8 0xf90299c904793a3c, 0x0000bffc
+data8 0xf559511d2dc1ed69, 0x0000bffc
+data8 0xf2e72afee9bd2aee, 0x0000bffc
+data8 0xef39ff1d8a40770e, 0x0000bffc
+data8 0xeb8a7a2311c935dc, 0x0000bffc
+data8 0xe7d8990dc620012f, 0x0000bffc
+data8 0xe560b1e3b86e44b6, 0x0000bffc
+data8 0xe1aadb38caee80c4, 0x0000bffc
+data8 0xddf2a051f81b76a4, 0x0000bffc
+data8 0xdb7678bafcaf4b5f, 0x0000bffc
+data8 0xd7ba3a8f0df19bfc, 0x0000bffc
+data8 0xd3fb8fdbdd5cebdb, 0x0000bffc
+data8 0xd17b191905c35652, 0x0000bffc
+data8 0xcdb85d29cefd7121, 0x0000bffc
+data8 0xc9f32c3c88221ef6, 0x0000bffc
+data8 0xc76e5741a95b5dae, 0x0000bffc
+data8 0xc3a506d80d38c718, 0x0000bffc
+data8 0xbfd938ccef8b68c1, 0x0000bffc
+data8 0xbd4ff63e82eef78c, 0x0000bffc
+data8 0xb97ffa2b563865bd, 0x0000bffc
+data8 0xb6f3eb3011eddcea, 0x0000bffc
+data8 0xb31fb7d64898b3e6, 0x0000bffc
+data8 0xb090d63a409e7880, 0x0000bffc
+data8 0xacb8623c7ffa4f39, 0x0000bffc
+data8 0xa8dd5c83d2e45246, 0x0000bffc
+data8 0xa649e998a8d91f2e, 0x0000bffc
+data8 0xa26a93fed6faa94f, 0x0000bffc
+data8 0x9fd43df079d0db1f, 0x0000bffc
+data8 0x9d3cbe69aecac4c2, 0x0000bffc
+data8 0x99574f13c570d0fb, 0x0000bffc
+data8 0x96bce349bf7ee6c7, 0x0000bffc
+data8 0x92d30c9b86cee18e, 0x0000bffc
+data8 0x9035adef17c5bd5c, 0x0000bffc
+data8 0x8c4765e8e8b5f251, 0x0000bffc
+data8 0x89a70da448316ffa, 0x0000bffc
+data8 0x85b44a24474af78a, 0x0000bffc
+data8 0x8310f17aab5adf70, 0x0000bffc
+data8 0x806c6388d0965f29, 0x0000bffc
+data8 0xf8e69092bf0c5ead, 0x0000bffb
+data8 0xf397608bfd2d90e6, 0x0000bffb
+data8 0xee45be24d0eedbc4, 0x0000bffb
+data8 0xe646af233db881e9, 0x0000bffb
+data8 0xe0eee4e1ce3d06fb, 0x0000bffb
+data8 0xdb94a049e6e87a4f, 0x0000bffb
+data8 0xd3888ef9a4249f5a, 0x0000bffb
+data8 0xce280e6fbac39194, 0x0000bffb
+data8 0xc8c50b72319ad574, 0x0000bffb
+data8 0xc0abcd39f41e329b, 0x0000bffb
+data8 0xbb4279cfa7f9667b, 0x0000bffb
+data8 0xb5d69bac77ec398a, 0x0000bffb
+data8 0xb068306bf20d6233, 0x0000bffb
+data8 0xa83dc1b019ddb6a8, 0x0000bffb
+data8 0xa2c8eb1886c2d024, 0x0000bffb
+data8 0x9d517ee93f8e16c0, 0x0000bffb
+data8 0x97d77aae659b92fb, 0x0000bffb
+data8 0x8f9b91da5736d415, 0x0000bffb
+data8 0x8a1b06b09b7fd1d1, 0x0000bffb
+data8 0x8497daca0a2e077a, 0x0000bffb
+data8 0xfe241745a453f10c, 0x0000bffa
+data8 0xf3132d6708d723c5, 0x0000bffa
+data8 0xe7fcf2e21a0e7d77, 0x0000bffa
+data8 0xd75198b04afb8da9, 0x0000bffa
+data8 0xcc2dfe1a4a8ca305, 0x0000bffa
+data8 0xc10500d63aa65882, 0x0000bffa
+data8 0xb5d69bac77ec398a, 0x0000bffa
+data8 0xaaa2c95dc66abcde, 0x0000bffa
+data8 0x9f6984a342d13101, 0x0000bffa
+data8 0x942ac82e5387ac51, 0x0000bffa
+data8 0x88e68ea899a0976c, 0x0000bffa
+data8 0xefebc4409ccf872e, 0x0000bff9
+data8 0xd947b0c6642ef69e, 0x0000bff9
+data8 0xc2987d51e043d407, 0x0000bff9
+data8 0xabde1eeee6bfd257, 0x0000bff9
+data8 0x95188a9917cf2e01, 0x0000bff9
+data8 0xfc8f6a777c1b7f1e, 0x0000bff8
+data8 0xced727635c59725c, 0x0000bff8
+data8 0xa108358a4c904615, 0x0000bff8
+data8 0xe644fcbeb3ac9c90, 0x0000bff7
+data8 0x8a4bd667bf08e7de, 0x0000bff7
+data8 0x0000000000000000 // T[255] Low
+data8 0x0000000000000000 // T[255] High
+LOCAL_OBJECT_END(T_table)
+
+
+
+.section .text
+WEAK_LIBM_ENTRY(log2)
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // y=frcpa(x)
+ frcpa.s1 f6,p0=f1,f8
+ // will form significand of 1.5 (to test whether the index is 128 or above)
+ mov r24=0xc
+}
+{.mfi
+ nop.m 0
+ // normalize x
+ fma.s1 f7=f8,f1,f0
+ // r2 = pointer to C_1...C_6 followed by T_table
+ addl r2 = @ltoff(poly_coeffs), gp;;
+}
+{.mfi
+ // get significand
+ getf.sig r25=f8
+ // f8 denormal ?
+ fclass.m p8,p10=f8,0x9
+ // will form significand of 1.5 (to test whether the index is 128 or above)
+ shl r24=r24,60
+}
+{.mfi
+ mov r26=0x804
+ nop.f 0
+ // r23=bias-1
+ mov r23=0xfffe;;
+}
+
+{.mmf
+ getf.exp r29=f8
+ // load start address for C_1...C_6 followed by T_table
+ ld8 r2=[r2]
+ // will continue only for positive normal/denormal numbers
+ fclass.nm.unc p12,p7 = f8, 0x19 ;;
+}
+
+.pred.rel "mutex",p8,p10
+{.mfi
+ // denormal input, repeat get significand (after normalization)
+ (p8) getf.sig r25=f7
+ // x=1 ?
+ fcmp.eq.s0 p6,p0=f8,f1
+ // get T_index
+ (p10) shr.u r28=r25,63-8
+}
+{.mfi
+ // f32=0.5
+ setf.exp f32=r23
+ nop.f 0
+ // r27=bias
+ mov r27=0xffff;;
+}
+
+{.mmi
+ // denormal input, repeat get exponent (after normalization)
+ (p8) getf.exp r29=f7
+ mov r23=0xff
+ // r26=0x80400...0 (threshold for using polynomial approximation)
+ shl r26=r26,64-12;;
+}
+
+{.mfb
+ add r3=48,r2
+ // r=1-x*y
+ fms.s1 f6=f6,f8,f1
+ (p12) br.cond.spnt SPECIAL_LOG2
+}
+{.mfi
+ // load C_4, C_5
+ ldfpd f10,f11=[r2],16
+ nop.f 0
+ cmp.geu p12,p0=r25,r24;;
+}
+
+{.mmi
+ // load C_6, C_7
+ ldfpd f12,f13=[r2],16
+ // r27=bias-1 (if index >=128, will add exponent+1)
+ (p12) mov r27=0xfffe
+ (p8) shr.u r28=r25,63-8;;
+}
+
+
+{.mfi
+ // load C_1
+ ldfe f14=[r2],32
+ fmerge.se f7=f1,f7
+ // if first 9 bits after leading 1 are all zero, then p8=1
+ cmp.ltu p8,p12=r25,r26
+}
+{.mfi
+ // load C_3
+ ldfe f15=[r3]
+ nop.f 0
+ // get T_index
+ and r28=r28,r23;;
+}
+{.mfi
+ // r29=exponent-bias
+ sub r29=r29,r27
+ // x=1, return 0
+ (p6) fma.d.s0 f8=f0,f0,f0
+ // get T address
+ shladd r2=r28,4,r2
+}
+{.mfb
+ // first 8 bits after leading 1 are all ones ?
+ cmp.eq p10,p0=r23,r28
+ // if first 8 bits after leading bit are 0, use polynomial approx. only
+ (p8) fms.s1 f6=f7,f1,f1
+ // x=1, return
+ (p6) br.ret.spnt b0;;
+}
+{.mfi
+ // r26=1
+ mov r26=1
+ // if first 8 bits after leading 1 are all ones, use polynomial approx. only
+ (p10) fms.s1 f6=f7,f32,f1
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p12
+{.mmf
+ // load T (unless first 9 bits after leading 1 are 0)
+ (p12) ldfe f33=[r2]
+ // f8=expon - bias
+ setf.sig f8=r29
+ // set T=0 (if first 9 bits after leading 1 are 0)
+ (p8) fma.s1 f33=f0,f0,f0;;
+}
+
+{.mfi
+ nop.m 0
+ // P12=1-0.5*r
+ fnma.s1 f32=f32,f6,f1
+ // r26=2^{63}
+ shl r26=r26,63
+}
+{.mfi
+ nop.m 0
+ // r2=r*r
+ fma.s1 f7=f6,f6,f0
+ nop.i 0;;
+}
+{.mfi
+ // significand(x)=1 ?
+ cmp.eq p0,p6=r26,r25
+ // P67=C_6+C_7*r
+ fma.s1 f13=f13,f6,f12
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P45=C_4+C_5*r
+ fma.s1 f10=f11,f6,f10
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // C_1*r
+ (p6) fma.s1 f14=f14,f6,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // normalize additive term (l=exponent of x)
+ fcvt.xf f8=f8
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P13=1-0.5*r+C_3*r^2
+ (p6) fma.s1 f15=f15,f7,f32
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P47=P45+r2*P67
+ (p6) fma.s1 f13=f13,f7,f10
+ // if significand(x)=1, return exponent (l)
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r3=r^3
+ (p6) fma.s1 f7=f7,f6,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // add T+l
+ (p6) fma.s1 f8=f8,f1,f33
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P17=P13+r3*P47
+ (p6) fma.s1 f13=f13,f7,f15
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // result=T+l+(C_1*r)*P16
+ (p6) fma.d.s0 f8=f13,f14,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+SPECIAL_LOG2:
+{.mfi
+ nop.m 0
+ // x=+Infinity ?
+ fclass.m p7,p0=f8,0x21
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // x=+/-Zero ?
+ fclass.m p8,p0=f8,0x7
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // x=-Infinity, -normal, -denormal ?
+ fclass.m p6,p0=f8,0x3a
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ // log2(+Infinity)=+Infinity
+ nop.f 0
+ (p7) br.ret.spnt b0;;
+}
+{.mfi
+ (p8) mov GR_Parameter_TAG = 170
+ // log2(+/-0)=-infinity, raises Divide by Zero
+ // set f8=-0
+ (p8) fmerge.ns f8=f0,f8
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ (p8) frcpa.s0 f8,p0=f1,f8
+ (p8) br.cond.sptk __libm_error_region;;
+}
+{.mfb
+ (p6) mov GR_Parameter_TAG = 171
+ // x<0: return NaN, raise Invalid
+ (p6) frcpa.s0 f8,p0=f0,f0
+ (p6) br.cond.sptk __libm_error_region;;
+}
+
+
+{.mfb
+ nop.m 0
+ // Remaining cases: NaNs
+ fma.d.s0 f8=f8,f1,f0
+ br.ret.sptk b0;;
+}
+
+WEAK_LIBM_END(log2)
+libm_alias_double_other (__log2, log2)
+#ifdef SHARED
+.symver log2,log2@@GLIBC_2.29
+.weak __log2_compat
+.set __log2_compat,__log2
+.symver __log2_compat,log2@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_log2_data.c b/sysdeps/ia64/fpu/e_log2_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log2_data.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log2f.S b/sysdeps/ia64/fpu/e_log2f.S
new file mode 100644
index 0000000000..2372613f01
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log2f.S
@@ -0,0 +1,553 @@
+.file "log2f.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 09/11/00 Initial version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// float log2f(float)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
+// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8 (table index)
+// j=0 if f<128; j=1 if f>=128
+// T is a table that stores log2(1/y) (in entries 1..255) rounded to
+// double extended precision; f is used as an index; T[255]=0
+//
+// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
+// and 0 is used instead of T[0]
+// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
+// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
+// for m=2(1-r'), 0<=r'<2^{-9})
+//
+// log2f(x) is approximated as
+// (l-j) + T[f] + (c1*r+c2*r^2+...+c6*r^6), if f>0
+//
+
+
+// Special values
+//==============================================================
+// log2f(0)=-inf, raises Divide by Zero
+// log2f(+inf)=inf
+// log2f(x)=NaN, raises Invalid if x<0
+//
+
+
+// Registers used
+//==============================================================
+// f6-f14
+// r2-r3, r23-r30
+// p6,p7,p8,p12
+//
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35 // This reg. can safely be used
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe //C_3 and C_4
+data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
+data8 0xb8aa3b295c17f0bc, 0x0000bffe // C_2
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+LOCAL_OBJECT_START(T_table)
+
+data8 0x3f671b0ea42e5fda, 0x3f815cfe8eaec830
+data8 0x3f8cfee70c5ce5dc, 0x3f94564a62192834
+data8 0x3f997723ace35766, 0x3f9f5923c69b54a1
+data8 0x3fa2a094a085d693, 0x3fa538941776b01e
+data8 0x3fa8324c9b914bc7, 0x3faacf54ce07d7e9
+data8 0x3fadced958dadc12, 0x3fb0387efbca869e
+data8 0x3fb18ac6067479c0, 0x3fb30edd3e13530d
+data8 0x3fb463c15936464e, 0x3fb5b9e13c3fa21d
+data8 0x3fb7113f3259e07a, 0x3fb869dd8d1b2035
+data8 0x3fb9c3bea49d3214, 0x3fbb1ee4d7961701
+data8 0x3fbc7b528b70f1c5, 0x3fbdd90a2c676ed4
+data8 0x3fbf05d4976c2028, 0x3fc032fbbaee6d65
+data8 0x3fc0e3b5a9f3284a, 0x3fc195195c7d125b
+data8 0x3fc22dadc2ab3497, 0x3fc2e050231df57d
+data8 0x3fc379f79c2b255b, 0x3fc42ddd2ba1b4a9
+data8 0x3fc4c89b9e6807f5, 0x3fc563dc29ffacb2
+data8 0x3fc619a25f5d798d, 0x3fc6b5ffbf367644
+data8 0x3fc752e1f660f8d6, 0x3fc7f049e753e7cf
+data8 0x3fc8a8980abfbd32, 0x3fc94724cca657be
+data8 0x3fc9e63a24971f46, 0x3fca85d8feb202f7
+data8 0x3fcb2602497d5346, 0x3fcbc6b6f5ee1c9b
+data8 0x3fcc67f7f770a67e, 0x3fcceec4b2234fba
+data8 0x3fcd91097ad13982, 0x3fce33dd57f3d335
+data8 0x3fced74146bc7b10, 0x3fcf7b3646fef683
+data8 0x3fd00223a943dc19, 0x3fd054a474bf0eb7
+data8 0x3fd0999d9b9259a1, 0x3fd0eca66d3b2581
+data8 0x3fd13ffa2e85b475, 0x3fd185a444fa0a7b
+data8 0x3fd1cb8312f27eff, 0x3fd21fa1441ce5e8
+data8 0x3fd265f526e603cb, 0x3fd2baa0c34be1ec
+data8 0x3fd3016b45de21ce, 0x3fd3486c38aa29a8
+data8 0x3fd38fa3efaa8262, 0x3fd3e562c0816a02
+data8 0x3fd42d141f53b646, 0x3fd474fd543f222c
+data8 0x3fd4bd1eb680e548, 0x3fd505789e234bd1
+data8 0x3fd54e0b64003b70, 0x3fd596d761c3c1f0
+data8 0x3fd5dfdcf1eeae0e, 0x3fd6291c6fd9329c
+data8 0x3fd6729637b59418, 0x3fd6bc4aa692e0fd
+data8 0x3fd7063a1a5fb4f2, 0x3fd75064f1ed0715
+data8 0x3fd79acb8cf10390, 0x3fd7d67c1e43ae5c
+data8 0x3fd8214f4068afa7, 0x3fd86c5f36dea3dc
+data8 0x3fd8b7ac64dd7f9d, 0x3fd8f4167a0c6f92
+data8 0x3fd93fd2d5e1bf1d, 0x3fd98bcd84296946
+data8 0x3fd9c8c333e6e9a5, 0x3fda152f142981b4
+data8 0x3fda527fd95fd8ff, 0x3fda9f5e3edeb9e6
+data8 0x3fdadd0b2b5755a7, 0x3fdb2a5d6f51ff83
+data8 0x3fdb686799b00be3, 0x3fdbb62f1b887cd8
+data8 0x3fdbf4979f666668, 0x3fdc332a6e8399d4
+data8 0x3fdc819dc2d45fe4, 0x3fdcc0908e19b7bd
+data8 0x3fdcffae611ad12b, 0x3fdd3ef776d43ff4
+data8 0x3fdd8e5002710128, 0x3fddcdfb486cb9a1
+data8 0x3fde0dd294245fe4, 0x3fde4dd622a28840
+data8 0x3fde8e06317114f0, 0x3fdece62fe9a9915
+data8 0x3fdf1f164a15389a, 0x3fdf5fd8a9063e35
+data8 0x3fdfa0c8937e7d5d, 0x3fdfe1e649bb6335
+data8 0x3fe011990641535a, 0x3fe032560e91e59e
+data8 0x3fe0532a5ebcd44a, 0x3fe0741617f5fc28
+data8 0x3fe08cd653f38839, 0x3fe0adeb55c1103b
+data8 0x3fe0cf181d5d1dd0, 0x3fe0f05ccd0aced7
+data8 0x3fe111b9875788ab, 0x3fe1332e6f1bcf73
+data8 0x3fe154bba77c2088, 0x3fe16df59bfa06c1
+data8 0x3fe18fadb6e2d3c2, 0x3fe1b17e849adc26
+data8 0x3fe1caeb6a0de814, 0x3fe1ece7c830eec9
+data8 0x3fe20efd3dae01df, 0x3fe2289de375d901
+data8 0x3fe24adf9b6a6fe0, 0x3fe26d3ad1aebcfc
+data8 0x3fe287100c2771f4, 0x3fe2a9983b3c1b28
+data8 0xbfda78e146f7bef4, 0xbfda33760a7f6051
+data8 0xbfd9ff43476fb5f7, 0xbfd9b97c3c4eec8f
+data8 0xbfd98504431717fc, 0xbfd93ee07535f967
+data8 0xbfd90a228d5712b2, 0xbfd8c3a104cb24f5
+data8 0xbfd88e9c72e0b226, 0xbfd847bc33d8618e
+data8 0xbfd812703988bb69, 0xbfd7dd0569c04bff
+data8 0xbfd7959c202292f1, 0xbfd75fe8d2c5d48f
+data8 0xbfd72a1637cbc183, 0xbfd6e221cd9d0cde
+data8 0xbfd6ac059985503b, 0xbfd675c99ce81f92
+data8 0xbfd63f6db2590482, 0xbfd5f6c138136489
+data8 0xbfd5c01a39fbd688, 0xbfd58952cf519193
+data8 0xbfd5526ad18493ce, 0xbfd51b6219bfe6ea
+data8 0xbfd4d1cdf8b4846f, 0xbfd49a784bcd1b8b
+data8 0xbfd4630161832547, 0xbfd42b6911cf5465
+data8 0xbfd3f3af3461e1c4, 0xbfd3bbd3a0a1dcfb
+data8 0xbfd383d62dac7ae7, 0xbfd34bb6b2546218
+data8 0xbfd313750520f520, 0xbfd2db10fc4d9aaf
+data8 0xbfd2a28a6dc90387, 0xbfd269e12f346e2c
+data8 0xbfd2311515e2e855, 0xbfd1f825f6d88e13
+data8 0xbfd1bf13a6c9c69f, 0xbfd185ddfa1a7ed0
+data8 0xbfd14c84c4dd6128, 0xbfd11307dad30b76
+data8 0xbfd0d9670f6941fe, 0xbfd09fa235ba2020
+data8 0xbfd0790adbb03009, 0xbfd03f09858c55fb
+data8 0xbfd004e3a7c97cbd, 0xbfcf9532288fcf69
+data8 0xbfcf205339208f27, 0xbfceab2a23a5b83e
+data8 0xbfce5ce55fdd37a5, 0xbfcde73fe3b1480f
+data8 0xbfcd714f44623927, 0xbfccfb1321b8c400
+data8 0xbfccac163c770dc9, 0xbfcc355b67195dd0
+data8 0xbfcbbe540a3f036f, 0xbfcb6ecf175f95e9
+data8 0xbfcaf74751e1be33, 0xbfca7f71fb7bab9d
+data8 0xbfca2f632320b86b, 0xbfc9b70ba539dfae
+data8 0xbfc93e6587910444, 0xbfc8edcae8352b6c
+data8 0xbfc874a0db01a719, 0xbfc7fb27199df16d
+data8 0xbfc7a9fec7d05ddf, 0xbfc72fff456ac70d
+data8 0xbfc6de7d66023dbc, 0xbfc663f6fac91316
+data8 0xbfc6121ac74813cf, 0xbfc5970c478fff4a
+data8 0xbfc51bab907a5c8a, 0xbfc4c93d33151b24
+data8 0xbfc44d527fdadf55, 0xbfc3fa87be0f3a1b
+data8 0xbfc3a797cd35d959, 0xbfc32ae9e278ae1a
+data8 0xbfc2d79c6937efdd, 0xbfc25a619370d9dc
+data8 0xbfc206b5bde2f8b8, 0xbfc188ecbd1d16be
+data8 0xbfc134e1b489062e, 0xbfc0b6894488e95f
+data8 0xbfc0621e2f556b5c, 0xbfc00d8c711a12cc
+data8 0xbfbf1cd21257e18c, 0xbfbe72ec117fa5b2
+data8 0xbfbdc8b7c49a1ddb, 0xbfbcc8d5e467b710
+data8 0xbfbc1ddc9c39c7a1, 0xbfbb7294093cdd0f
+data8 0xbfba7111df348494, 0xbfb9c501cdf75872
+data8 0xbfb918a16e46335b, 0xbfb81579a73e83c6
+data8 0xbfb7684f39f4ff2d, 0xbfb6bad3758efd87
+data8 0xbfb60d060d7e41ac, 0xbfb507b836033bb7
+data8 0xbfb4591d6310d85a, 0xbfb3aa2fdd27f1c3
+data8 0xbfb2faef55ccb372, 0xbfb1f3723b4ae6db
+data8 0xbfb14360d6136ffa, 0xbfb092fb594145c1
+data8 0xbfafc482e8b48a7e, 0xbfae6265ace11ae4
+data8 0xbfacff9e5c4341d0, 0xbfaaea3316095f72
+data8 0xbfa985bfc3495194, 0xbfa820a01ac754cb
+data8 0xbfa6bad3758efd87, 0xbfa554592bb8cd58
+data8 0xbfa3ed3094685a26, 0xbfa2855905ca70f6
+data8 0xbfa11cd1d5133413, 0xbf9dfd78881399f1
+data8 0xbf9b28f618cc85df, 0xbf98530faa3c087b
+data8 0xbf957bc3dddcd7fa, 0xbf92a3115322f9e6
+data8 0xbf8f91ed4eef8370, 0xbf89dae4ec6b8b2e
+data8 0xbf842106b1499209, 0xbf7cc89f97d67594
+data8 0xbf71497accf7e11d, 0x0000000000000000
+LOCAL_OBJECT_END(T_table)
+
+
+.section .text
+WEAK_LIBM_ENTRY(log2f)
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // y=frcpa(x)
+ frcpa.s1 f6,p0=f1,f8
+ // will form significand of 1.5 (to test whether the index is 128 or above)
+ mov r24=0xc
+}
+{.mfi
+ nop.m 0
+ // normalize x
+ fma.s1 f7=f8,f1,f0
+ // r2 = pointer to C_1...C_6 followed by T_table
+ addl r2 = @ltoff(poly_coeffs), gp;;
+}
+{.mfi
+ // get significand
+ getf.sig r25=f8
+ // f8 denormal ?
+ fclass.m p8,p10=f8,0x9
+ // will form significand of 1.5 (to test whether the index is 128 or above)
+ shl r24=r24,60
+}
+{.mfi
+ mov r26=0x804
+ nop.f 0
+ // r23=bias-1
+ mov r23=0xfffe;;
+}
+
+{.mmf
+ getf.exp r29=f8
+ // load start address for C_1...C_6 followed by T_table
+ ld8 r2=[r2]
+ // will continue only for positive normal/denormal numbers
+ fclass.nm.unc p12,p7 = f8, 0x19 ;;
+}
+
+.pred.rel "mutex",p8,p10
+{.mfi
+ // denormal input, repeat get significand (after normalization)
+ (p8) getf.sig r25=f7
+ // x=1 ?
+ fcmp.eq.s0 p6,p0=f8,f1
+ // get T_index
+ (p10) shr.u r28=r25,63-8
+}
+{.mfi
+ // f12=0.5
+ setf.exp f12=r23
+ nop.f 0
+ // r27=bias
+ mov r27=0xffff;;
+}
+
+{.mfb
+ // denormal input, repeat get exponent (after normalization)
+ (p8) getf.exp r29=f7
+ nop.f 0
+ (p12) br.cond.spnt SPECIAL_log2f
+}
+{.mfi
+ cmp.geu p12,p0=r25,r24
+ nop.f 0
+ mov r23=0xff;;
+}
+
+{.mfi
+ add r3=32,r2
+ // r=1-x*y
+ fms.s1 f6=f6,f8,f1
+ // r26=0x80400...0 (threshold for using polynomial approximation)
+ shl r26=r26,64-12
+}
+{.mfi
+ // load C_3, C_4
+ ldfpd f10,f11=[r2],16
+ nop.f 0
+ // r27=bias-1 (if index >=128, will add exponent+1)
+ (p12) mov r27=0xfffe;;
+}
+
+{.mfi
+ // load C_1
+ ldfe f14=[r2],32
+ // x=1, return 0
+ (p6) fma.s.s0 f8=f0,f0,f0
+ (p8) shr.u r28=r25,63-8
+}
+{.mib
+ // load C_2
+ ldfe f13=[r3]
+ // r29=exponent-bias
+ sub r29=r29,r27
+ // x=1, return
+ (p6) br.ret.spnt b0;;
+}
+
+
+{.mfi
+ // get T_index
+ and r28=r28,r23
+ fmerge.se f7=f1,f7
+ // if first 9 bits after leading 1 are all zero, then p8=1
+ cmp.ltu p8,p12=r25,r26;;
+}
+{.mfi
+ // f8=expon - bias
+ setf.sig f8=r29
+ nop.f 0
+ // get T address
+ shladd r2=r28,3,r2
+}
+{.mfi
+ // first 8 bits after leading 1 are all ones ?
+ cmp.eq p10,p0=r23,r28
+ // if first 8 bits after leading bit are 0, use polynomial approx. only
+ (p8) fms.s1 f6=f7,f1,f1
+ nop.i 0;;
+}
+{.mfi
+ //r26=1
+ mov r26=1
+ // if first 8 bits after leading 1 are all ones, use polynomial approx. only
+ (p10) fms.s1 f6=f7,f12,f1
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p12
+{.mmf
+ // load T (unless first 9 bits after leading 1 are 0)
+ (p12) ldfd f12=[r2]
+ nop.m 0
+ // set T=0 (if first 9 bits after leading 1 are 0)
+ (p8) fma.s1 f12=f0,f0,f0;;
+}
+
+{.mfi
+ nop.m 0
+ // P34=C_3+C_4*r
+ fma.s1 f10=f11,f6,f10
+ // r26=2^{63}
+ shl r26=r26,63
+}
+{.mfi
+ nop.m 0
+ // r2=r*r
+ fma.s1 f11=f6,f6,f0
+ nop.i 0;;
+}
+{.mfi
+ // significand of x is 1 ?
+ cmp.eq p0,p6=r25,r26
+ // P12=C_1+C_2*r
+ fma.s1 f14=f13,f6,f14
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // normalize additive term (l=exponent of x)
+ fcvt.xf f8=f8
+ // if significand(x)=1, return exponent (l)
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // add T+l
+ (p6) fma.s1 f8=f8,f1,f12
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P14=P12+r2*P34
+ (p6) fma.s1 f13=f10,f11,f14
+ nop.i 0;;
+}
+
+{.mfb
+ nop.m 0
+ // result=T+l+r*P14
+ (p6) fma.s.s0 f8=f13,f6,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+SPECIAL_log2f:
+{.mfi
+ nop.m 0
+ // x=+Infinity ?
+ fclass.m p7,p0=f8,0x21
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // x=+/-Zero ?
+ fclass.m p8,p0=f8,0x7
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // x=-Infinity, -normal, -denormal ?
+ fclass.m p6,p0=f8,0x3a
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ // log2f(+Infinity)=+Infinity
+ nop.f 0
+ (p7) br.ret.spnt b0;;
+}
+{.mfi
+ (p8) mov GR_Parameter_TAG = 172
+ // log2f(+/-0)=-infinity, raises Divide by Zero
+ // set f8=-0
+ (p8) fmerge.ns f8=f0,f8
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ (p8) frcpa.s0 f8,p0=f1,f8
+ (p8) br.cond.sptk __libm_error_region;;
+}
+{.mfb
+ (p6) mov GR_Parameter_TAG = 173
+ // x<0: return NaN, raise Invalid
+ (p6) frcpa.s0 f8,p0=f0,f0
+ (p6) br.cond.sptk __libm_error_region;;
+}
+
+
+{.mfb
+ nop.m 0
+ // Remaining cases: NaNs
+ fma.s.s0 f8=f8,f1,f0
+ br.ret.sptk b0;;
+}
+
+WEAK_LIBM_END(log2f)
+libm_alias_float_other (__log2, log2)
+#ifdef SHARED
+.symver log2f,log2f@@GLIBC_2.27
+.weak __log2f_compat
+.set __log2f_compat,__log2f
+.symver __log2f_compat,log2f@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_log2f_data.c b/sysdeps/ia64/fpu/e_log2f_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log2f_data.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_log2l.S b/sysdeps/ia64/fpu/e_log2l.S
new file mode 100644
index 0000000000..8b97b2a408
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log2l.S
@@ -0,0 +1,815 @@
+.file "log2l.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 09/25/00 Initial version
+// 11/22/00 Fixed accuracy bug (for mantissas near 1, 2)
+// 12/07/00 Fixed C_1l constant, eliminated rounding errors in
+// reduced argument (x*frcpa(x)-1)
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//==============================================================
+// long double log2l(long double)
+//
+// Overview of operation
+//==============================================================
+// Background
+//
+// Implementation
+//
+// Let x = 2^l * m, where m=1.b1 b2 ... b8 b9 ... b52
+// y=frcpa(m), r=m*y-1, f=b1 b2 .. b8
+// T_hi is a table that stores the 24 most significant bits of log2(1/y)
+// (in entries 1..255) in single precision format
+// T_low is a table that stores (log2(1/y)-T_high), rounded to double
+// precision
+//
+// f is used as an index; T_high[255]=T_low[255]=0
+//
+// If f=0 and b9=0, r is set to 2^{-8}* 0.b9 b10 ... b52 = m-1 (fractional part of m),
+// and 0 is used instead of T_high[0], T_low[0]
+// (polynomial evaluation only, for m=1+r, 0<=r<2^{-9})
+// If f=255, r is set to (m-2)/2 (T[255]=0, and only polynomial evaluation is used
+// for m=2(1-r'), 0<=r'<2^{-9})
+//
+// If 2^{-9}<=m<2-2^{-8} or (input not near 1), let C1r=(2^{16}+C1*r)-2^{16}
+// and let E=((RN(m*y)-1)-r)+(m*y-RN(m*y))
+// Else let C1r=C1*r (rounded to 64 significant bits) and let E=0
+//
+// Let D=C1*r-C1r
+//
+//
+// log2l(x) is approximated as
+// (l+T_high[f]+C1r) + (D+r*(c1+c2*r+c3*r^2...+c8*r^7)+(T_low[f]+C_1*E))
+//
+
+
+// Special values
+//==============================================================
+// log2l(0)=-inf, raises Divide by Zero
+// log2l(+inf)=inf
+// log2l(x)=NaN, raises Invalid if x<0
+//
+
+
+// Registers used
+//==============================================================
+// f6-f15, f32-f36
+// r2-r3, r23-r23
+// p6,p7,p8,p12
+//
+
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35 // This reg. can safely be used
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+
+
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(poly_coeffs)
+
+data8 0xb8aa3b295c17f0bc, 0x00003fff // C_1
+data8 0x3fca61762a7aded9, 0xbfc71547652b82fe // C_7, C_8
+data8 0x3fd2776c50ef9bfe, 0xbfcec709dc3a03fd // C_5, C_6
+data8 0x3fdec709dc3a03fd, 0xbfd71547652b82fe // C_3, C_4
+//data8 0xd871319ff0342580, 0x0000bfbd // C_1l (low part of C1)
+data8 0x82f0025f2dc582ee, 0x0000bfbe // C_1l (low part of C1)
+data8 0xb8aa3b295c17f0bc, 0x0000bffe // C_2
+LOCAL_OBJECT_END(poly_coeffs)
+
+
+
+
+LOCAL_OBJECT_START(T_table)
+
+data4 0x3b38d875, 0x3c0ae7f4, 0x3c67f738, 0x3ca2b253
+data4 0x3ccbb91d, 0x3cfac91e, 0x3d1504a5, 0x3d29c4a0
+data4 0x3d419264, 0x3d567aa6, 0x3d6e76ca, 0x3d81c3f7
+data4 0x3d8c5630, 0x3d9876e9, 0x3da31e0a, 0x3dadcf09
+data4 0x3db889f9, 0x3dc34eec, 0x3dce1df5, 0x3dd8f726
+data4 0x3de3da94, 0x3deec851, 0x3df82ea4, 0x3e0197dd
+data4 0x3e071dad, 0x3e0ca8ca, 0x3e116d6e, 0x3e170281
+data4 0x3e1bcfbc, 0x3e216ee9, 0x3e2644dc, 0x3e2b1ee1
+data4 0x3e30cd12, 0x3e35affd, 0x3e3a970f, 0x3e3f824f
+data4 0x3e4544c0, 0x3e4a3926, 0x3e4f31d1, 0x3e542ec7
+data4 0x3e593012, 0x3e5e35b7, 0x3e633fbf, 0x3e677625
+data4 0x3e6c884b, 0x3e719eea, 0x3e76ba0a, 0x3e7bd9b2
+data4 0x3e80111d, 0x3e82a523, 0x3e84ccec, 0x3e876533
+data4 0x3e89ffd1, 0x3e8c2d22, 0x3e8e5c18, 0x3e90fd0a
+data4 0x3e932fa9, 0x3e95d506, 0x3e980b5a, 0x3e9a4361
+data4 0x3e9c7d1f, 0x3e9f2b16, 0x3ea168a0, 0x3ea3a7ea
+data4 0x3ea5e8f5, 0x3ea82bc4, 0x3eaa705b, 0x3eacb6bb
+data4 0x3eaefee7, 0x3eb148e3, 0x3eb394b1, 0x3eb5e255
+data4 0x3eb831d0, 0x3eba8327, 0x3ebcd65c, 0x3ebeb3e0
+data4 0x3ec10a7a, 0x3ec362f9, 0x3ec5bd63, 0x3ec7a0b3
+data4 0x3ec9fe96, 0x3ecc5e6c, 0x3ece4619, 0x3ed0a978
+data4 0x3ed293fe, 0x3ed4faf1, 0x3ed6e859, 0x3ed952eb
+data4 0x3edb433c, 0x3eddb178, 0x3edfa4bc, 0x3ee19953
+data4 0x3ee40cee, 0x3ee60484, 0x3ee7fd73, 0x3ee9f7bb
+data4 0x3eec7280, 0x3eee6fda, 0x3ef06e94, 0x3ef26eb1
+data4 0x3ef47031, 0x3ef67317, 0x3ef8f8b2, 0x3efafec5
+data4 0x3efd0644, 0x3eff0f32, 0x3f008cc8, 0x3f0192b0
+data4 0x3f029952, 0x3f03a0b0, 0x3f0466b2, 0x3f056f5a
+data4 0x3f0678c0, 0x3f0782e6, 0x3f088dcc, 0x3f099973
+data4 0x3f0aa5dd, 0x3f0b6fac, 0x3f0c7d6d, 0x3f0d8bf4
+data4 0x3f0e575b, 0x3f0f673e, 0x3f1077e9, 0x3f1144ef
+data4 0x3f1256fc, 0x3f1369d6, 0x3f143880, 0x3f154cc1
+data4 0x3f161c7a, 0x3f173227, 0x3f1802f2, 0x3f191a0f
+data4 0x3f19ebee, 0x3f1b047e, 0x3f1bd775, 0x3f1cf17b
+data4 0x3f1dc58e, 0x3f1ee10f, 0x3f1fb63f, 0x3f208bea
+data4 0x3f21a98f, 0x3f22805c, 0x3f2357a7, 0x3f247778
+data4 0x3f254fe9, 0x3f2628d9, 0x3f270249, 0x3f2824fb
+data4 0x3f28ff97, 0x3f29dab4, 0x3f2ab654, 0x3f2b9277
+data4 0x3f2cb8c8, 0x3f2d961e, 0x3f2e73fa, 0x3f2f525b
+data4 0x3f303143, 0x3f3110b1, 0x3f31f0a7, 0x3f32d125
+data4 0x3f33b22b, 0x3f3493bc, 0x3f3575d6, 0x3f36587b
+data4 0x3f373bab, 0x3f381f68, 0x3f3903b1, 0x3f39e888
+data4 0x3f3acdec, 0x3f3bb3e0, 0x3f3c9a63, 0x3f3d8177
+data4 0x3f3e1bd4, 0x3f3f03d9, 0x3f3fec71, 0x3f40d59b
+data4 0x3f41bf59, 0x3f42a9ab, 0x3f434635, 0x3f443180
+data4 0x3f451d61, 0x3f4609d9, 0x3f46a7d3, 0x3f479549
+data4 0x3f488357, 0x3f492261, 0x3f4a1171, 0x3f4b011c
+data4 0x3f4ba139, 0x3f4c91e8, 0x3f4d8334, 0x3f4e246a
+data4 0x3f4f16be, 0x3f5009b1, 0x3f50ac02, 0x3f51a001
+data4 0x3f524305, 0x3f533812, 0x3f53dbca, 0x3f54d1e7
+data4 0x3f55c8a8, 0x3f566d85, 0x3f57655b, 0x3f580af0
+data4 0x3f58b0d0, 0x3f59aa2c, 0x3f5a50c7, 0x3f5b4b3c
+data4 0x3f5bf294, 0x3f5cee26, 0x3f5d963c, 0x3f5e92ed
+data4 0x3f5f3bc3, 0x3f5fe4e7, 0x3f60e32d, 0x3f618d13
+data4 0x3f623748, 0x3f63372a, 0x3f63e223, 0x3f648d6b
+data4 0x3f658eee, 0x3f663afe, 0x3f66e75e, 0x3f67ea86
+data4 0x3f6897b0, 0x3f69452c, 0x3f69f2f9, 0x3f6af847
+data4 0x3f6ba6e2, 0x3f6c55d0, 0x3f6d0510, 0x3f6e0c8d
+data4 0x3f6ebc9f, 0x3f6f6d04, 0x3f701dbe, 0x3f70cecd
+data4 0x3f718030, 0x3f728ae6, 0x3f733d20, 0x3f73efaf
+data4 0x3f74a296, 0x3f7555d3, 0x3f760967, 0x3f76bd53
+data4 0x3f777197, 0x3f7880a1, 0x3f7935c2, 0x3f79eb3c
+data4 0x3f7aa10f, 0x3f7b573b, 0x3f7c0dc2, 0x3f7cc4a3
+data4 0x3f7d7bdf, 0x3f7e3376, 0x3f7eeb68, 0x00000000
+LOCAL_OBJECT_END(T_table)
+
+
+
+LOCAL_OBJECT_START(T_low)
+
+
+data8 0x3dc0b97f689876ef, 0x3dfd5d906028ac01
+data8 0x3df8b9cbb8d7240b, 0x3de0c941a2f220cd
+data8 0x3e09c6aecba15936, 0x3dfa6d528241827c
+data8 0x3dd0bad25714903c, 0x3e2776b01dc036a2
+data8 0x3e2b914bc77f158b, 0x3e1c0fafd29dc74a
+data8 0x3e28dadc119cd3de, 0x3e3bca869da085be
+data8 0x3e19d1e700f2200a, 0x3e3e13530cc37504
+data8 0x3e3936464d9c41ee, 0x3e3c3fa21c9499d0
+data8 0x3e3259e079b6c6e8, 0x3e2a364069c4f7f3
+data8 0x3e1274c84f6c6364, 0x3e3796170159f454
+data8 0x3e26e1e389f4364e, 0x3e28cedda8c7f658
+data8 0x3e376c2028433268, 0x3e4aee6d650c82e1
+data8 0x3e33e65094fbeeb4, 0x3e4c7d125aa92c5d
+data8 0x3e1559a4b69691d8, 0x3e18efabeb7d7221
+data8 0x3e4c2b255abaa8de, 0x3e37436952a4538b
+data8 0x3e4e6807f4ba00b8, 0x3e33ff5964190e42
+data8 0x3e4f5d798cead43c, 0x3e4f3676443bf453
+data8 0x3e4660f8d5bc1bf5, 0x3e2d4f9f3ab04f36
+data8 0x3e357f7a64ccd537, 0x3e394caf7c9b05af
+data8 0x3e225c7d17ab29b0, 0x3e4eb202f6d55a12
+data8 0x3e32faa68b19bcd2, 0x3e45ee1c9b566a8b
+data8 0x3e4770a67de054ff, 0x3e42234fb9de6d6b
+data8 0x3e4ad139825c6e19, 0x3e47f3d334814a93
+data8 0x3e2af1ec402867b6, 0x3e2bfbda0c956e3d
+data8 0x3e4287b831e77ff2, 0x3e54bf0eb77f7b89
+data8 0x3e5b9259a1029607, 0x3e4a764b015e699d
+data8 0x3e4d0b68ea883ab5, 0x3e33e829ecdadf46
+data8 0x3e52f27efef3031b, 0x3e3073979e4af89e
+data8 0x3e3b980f2cd6c253, 0x3e2a5f0f5f7f66a9
+data8 0x3e37788738117b02, 0x3e58aa29a784d52f
+data8 0x3e4f5504c4ff2466, 0x3e002d40340fa647
+data8 0x3e5f53b64592f4c3, 0x3e543f222c526802
+data8 0x3e5680e547a872fa, 0x3e5e234bd1154450
+data8 0x3e3000edc18b6d21, 0x3e1c3c1f000942a8
+data8 0x3e51eeae0e442d6e, 0x3e4fb265376623f2
+data8 0x3e57b5941782d830, 0x3e3a4b83f24ae52c
+data8 0x3e5a5fb4f23978de, 0x3e51ed071563fb02
+data8 0x3e49e2071f51a7a8, 0x3e5e43ae5b924234
+data8 0x3dfa2be9aedf374a, 0x3e56dea3dbba67d5
+data8 0x3e3375fe732b3c3e, 0x3e5a0c6f91f2e77e
+data8 0x3e55e1bf1c969e41, 0x3e30a5a5166b8eee
+data8 0x3e53e6e9a539d46c, 0x3e542981b3d7b0e6
+data8 0x3e595fd8ff36ad64, 0x3e5edeb9e65cbbb4
+data8 0x3e46aeab4d3434c1, 0x3e4ea3ff0564b010
+data8 0x3e59b00be2e3c25a, 0x3e5b887cd7b0821f
+data8 0x3e5f666668547b4d, 0x3e4d0733a805273f
+data8 0x3e26a2ff21c4aec5, 0x3e4c336f7a3a78f3
+data8 0x3e11ad12b628e2d0, 0x3e56d43ff3f0ea64
+data8 0x3e238809433cccd2, 0x3e40d9734147d40f
+data8 0x3e54245fe3e24e06, 0x3e251441fce4d48c
+data8 0x3e517114efc5d1f9, 0x3e5e9a99154b0d82
+data8 0x3e442a71337970f8, 0x3e420c7c69211fdf
+data8 0x3e537e7d5d43c6a7, 0x3e4376c66ad9ad8b
+data8 0x3e49054d678a4f1c, 0x3e5d23cb3bc19f18
+data8 0x3e6ebcd449dcab2b, 0x3e67f5fc2849c88a
+data8 0x3e63f388395d3e84, 0x3e65c1103b0ad7e9
+data8 0x3e6d5d1dd031f353, 0x3e5a159dae75c4d0
+data8 0x3e4d5e22aa75f71d, 0x3e5e379ee62e1e35
+data8 0x3e4df082213cb2dc, 0x3e6bfa06c156f521
+data8 0x3e66e2d3c19b517b, 0x3e426b7098590071
+data8 0x3e541bd027e9854e, 0x3e5061dd924b0ac0
+data8 0x3e6dae01df373a03, 0x3e3baec80b207b0b
+data8 0x3e6b6a6fe06bebac, 0x3e61aebcfc3ab5d1
+data8 0x3e584ee3e7c79d83, 0x3e6b3c1b2840cb40
+data8 0x3e6c842085d6befd, 0x3e6ac04fd7b141e0
+data8 0x3e6c48250474141d, 0x3e2d889b86125f69
+data8 0x3e6e74740225dad0, 0x3e45940d31d50a7c
+data8 0x3e695476a6c39ddc, 0x3e6d9a6d857a060a
+data8 0x3e4a3e9bb4b69337, 0x3e484f3ce4707ed6
+data8 0x3e39dd125d25fc27, 0x3e563fb400de8732
+data8 0x3e5fdd6d0ee28b48, 0x3e669d15b869bb07
+data8 0x3e40687cfad7964d, 0x3e69317990d43957
+data8 0x3e633d57e24ae1bd, 0x3e618bf03710eabb
+data8 0x3e4b4df6fccd1160, 0x3e3fb26ddaa1ec45
+data8 0x3e3810a5e1817fd4, 0x3e6857373642fa5c
+data8 0x3e673db6193add31, 0x3e63200c8acbc9c3
+data8 0x3e3d2dee448ebb62, 0x3e6a19723a80db6a
+data8 0x3e5e7cdab8fd3e6a, 0x3e671855cd660672
+data8 0x3e473c3c78a85ecd, 0x3e5f5e23056a7cf2
+data8 0x3e52538519527367, 0x3e4b573bcf2580e9
+data8 0x3e6d6f856fe90c60, 0x3e2d932a8487642e
+data8 0x3e5236fc78b6174c, 0x3e50cb91d406db50
+data8 0x3e650e8bd562aa57, 0x3e424ee3d9a82f2e
+data8 0x3e59363960e1e3d9, 0x3e379604c1150a3e
+data8 0x3e6d914f6c2ac258, 0x3e62967a451a7b48
+data8 0x3e684b5f01139cb2, 0x3e448bbfbf6d292c
+data8 0x3e6227e7fb487e73, 0x3e6d39d50290f458
+data8 0x3e58368342b4b668, 0x3e65dc0c25bd1763
+data8 0x3e61b7dc362e22b5, 0x3e671691f094bb80
+data8 0x3e5011642d5123f2, 0x3e4c4eb7f11e41be
+data8 0x3e5dcee36ca242cf, 0x3e6791cefff688f1
+data8 0x3e60e23c8dda4ecd, 0x3e48e6a22fe78cfe
+data8 0x3e6d703f244adc86, 0x3e6a281a85a5049d
+data8 0x3e570f20e6403d9e, 0x3e2211518a12956f
+data8 0x3e6737d1e54d71df, 0x3e66b1881476f5e9
+data8 0x3e6e1bbeef085376, 0x3e47cad4944a32be
+data8 0x3e527f2c738e7ee9, 0x3e699883a4b9fb29
+data8 0x3e5c17d1108740d9, 0x3e5d4a9c79a43389
+data8 0x3e49fdc24462ba3b, 0x3e24dbb3a60cceb2
+data8 0x3e5c5bf618780748, 0x3e5c38005b0c778c
+data8 0x3e6be168dd6dd3fe, 0x3e633ab9370693b0
+data8 0x3dd290556b0ae339, 0x3e607c317927096a
+data8 0x3e59651353b3d90e, 0x3e4d8751e5e0ae0d
+data8 0x3e46c81023272a85, 0x3e6b23c988f391b2
+data8 0x3e608741d215209c, 0x3e60b8ba506d758f
+data8 0x3e62ddbe74803297, 0x3e5dbb8b5087587d
+data8 0x3e642aa529048131, 0x3e3dcbda6835dcf4
+data8 0x3e6db503ce854d2a, 0x3e6dd00b49bc6849
+data8 0x3e4db2f11243bc84, 0x3e3b9848efc2ea97
+data8 0x3e58f18e17c82609, 0x3e6ed8645e16c312
+data8 0x3e4065bdb60a5dd4, 0x3e490453c6e6c30a
+data8 0x3e62373994aa31ba, 0x3e56305f0e6b2a95
+data8 0x3e68c1601a6614ee, 0x3e614e204f19d93f
+data8 0x3e6e5037ca773299, 0x3e693f98892561a6
+data8 0x3e639de4f4bf700d, 0x3e416c071e93fd97
+data8 0x3e65466991b415ef, 0x3e6896a324afac9d
+data8 0x3e44f64802e2f11c, 0x3e64d7d747e2191a
+data8 0x3e6174b7581de84c, 0x3e44c7b946e1d43c
+data8 0x3e6a3bcbe30512ec, 0x3e5d3ed411c95ce4
+data8 0x3e3e5b5735cfaf8e, 0x3e6e538ab34efb51
+data8 0x3e514e204f19d93f, 0x3e5a88e6550c89a4
+data8 0x3e66b97a5d9dfd8b, 0x3e5f46b1e14ebaf3
+data8 0x3e357665f6893f5d, 0x3e6bbf633078d1d5
+data8 0x3e5e7337a212c417, 0x3e3570fde15fc8cc
+data8 0x3e21119402da92b4, 0x3e6566e830d1ff3b
+data8 0x3e558883e480e220, 0x3e589ca3a68da411
+data8 0x3e44eb66df73d648, 0x3e1a0a629b1b7e68
+data8 0x3e54cc207b8c1116, 0x0000000000000000
+LOCAL_OBJECT_END(T_low)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(log2l)
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // normalize x
+ // y=frcpa(x)
+ frcpa.s1 f41,p0=f1,f8
+ // r26=bias-1
+ mov r26=0xfffe
+}
+{.mfi
+ // r23=bias+16
+ mov r23=0xffff+16
+ fma.s1 f7=f8,f1,f0
+ // r2 = pointer to C_1...C_6 followed by T_table
+ addl r2 = @ltoff(poly_coeffs), gp;;
+}
+{.mfi
+ // get significand
+ getf.sig r25=f8
+ // f8 denormal ?
+ fclass.m p8,p10=f8,0x9
+ // r24=bias-8
+ mov r24=0xffff-8;;
+}
+{.mfi
+ setf.exp f36=r26
+ nop.f 0
+ // r27=bias
+ mov r27=0xffff;;
+}
+
+{.mmf
+ getf.exp r29=f8
+ // load start address for C_1...C_7 followed by T_table
+ ld8 r2=[r2]
+ // will continue only for positive normal/unnormal numbers
+ fclass.m.unc p0,p12 = f8, 0x19;;
+}
+
+
+.pred.rel "mutex",p8,p10
+{.mfi
+ // denormal input, repeat get significand (after normalization)
+ (p8) getf.sig r25=f7
+ // x=1 ?
+ fcmp.eq.s0 p6,p0=f8,f1
+ // get T_index
+ (p10) shr.u r28=r25,63-8
+}
+{.mfi
+ // f32=2^16
+ setf.exp f32=r23
+ nop.f 0
+ mov r26=0x804;;
+}
+
+{.mfi
+ // denormal input, repeat get exponent (after normalization)
+ (p8) getf.exp r29=f7
+ // f33=0
+ mov f33=f0
+ // r26=0x80400...0 (threshold for using polynomial approximation)
+ shl r26=r26,64-12;;
+}
+
+{.mfb
+ add r3=16,r2
+ // r=x*y-1
+ fms.s1 f6=f41,f8,f1
+ (p12) br.cond.spnt SPECIAL_log2l
+}
+{.mfi
+ // load C_1
+ ldfe f14=[r2],48
+ // RN(x*y)
+ fma.s1 f43=f41,f8,f0
+ mov r23=0xff;;
+}
+
+{.mmi
+ // load C_7, C_8
+ ldfpd f10,f11=[r3],16
+ // load C_3,C_4
+ ldfpd f15,f42=[r2],16
+ (p8) shr.u r28=r25,63-8;;
+}
+
+
+{.mfi
+ // load C_5, C_6
+ ldfpd f12,f13=[r3]
+ // pseudo-zero ?
+ fcmp.eq.s0 p7,p0=f7,f0
+ // if first 9 bits after leading 1 are all zero, then p8=1
+ cmp.ltu p8,p12=r25,r26
+}
+{.mfi
+ // load C1l
+ ldfe f34=[r2],16
+ fmerge.se f7=f1,f7
+ // get T_index
+ and r28=r28,r23;;
+}
+{.mfi
+ // r29=exponent-bias
+ sub r29=r29,r27
+ // if first 8 bits after leading bit are 0, use polynomial approx. only
+ (p8) fms.s1 f6=f7,f1,f1
+ // start address of T_low
+ add r3=1024+16,r2
+}
+{.mfi
+ // load C_2
+ ldfe f35=[r2],16
+ // x=1, return 0
+ (p6) fma.s0 f8=f0,f0,f0
+ // first 8 bits after leading 1 are all ones ?
+ cmp.eq p10,p0=r23,r28;;
+}
+
+{.mfb
+ // if first 8 bits after leading 1 are all ones, use polynomial approx. only
+ // add 1 to the exponent additive term, and estimate log2(1-r)
+ (p10) add r29=1,r29
+ nop.f 0
+ (p7) br.cond.spnt LOG2_PSEUDO_ZERO
+}
+{.mfi
+ // get T_low address
+ shladd r3=r28,3,r3
+ // if first 8 bits after leading 1 are all ones, use polynomial approx. only
+ (p10) fms.s1 f6=f7,f36,f1
+ // p10 --> p8=1, p12=0
+ (p10) cmp.eq p8,p12=r0,r0;;
+}
+
+{.mfi
+ // get T_high address
+ shladd r2=r28,2,r2
+ // L(x*y)=x*y-RN(x*y)
+ fms.s1 f41=f41,f8,f43
+ nop.i 0
+}
+{.mfi
+ // p13=p12
+ (p12) cmp.eq.unc p13,p0=r0,r0
+ // RtH=RN(x*y)-1 (will eliminate rounding errors in r)
+ fms.s1 f43=f43,f1,f1
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p12
+{.mfb
+ // load T_high (unless first 9 bits after leading 1 are 0)
+ (p12) ldfs f7=[r2]
+ // set T_high=0 (if first 9 bits after leading 1 are 0)
+ (p8) fma.s1 f7=f0,f0,f0
+ // x=1, return
+ (p6) br.ret.spnt b0
+}
+.pred.rel "mutex",p8,p12
+{.mfi
+ // p12: load T_low
+ (p12) ldfd f36=[r3]
+ // p8: set T_low=0
+ (p8) fma.s1 f36=f0,f0,f0
+ (p8) cmp.eq p8,p12=r29,r0;; //nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p12
+{.mfi
+ // f8=expon - bias
+ setf.sig f8=r29
+ // general case: 2^{16}+C1*r
+ (p12) fma.s1 f33=f6,f14,f32
+ nop.i 0
+}
+{.mfi
+ // r26=1
+ mov r26=1
+ // p8 (mantissa is close to 1, or close to 2): 2^{-8}+C1*r
+ (p8) fma.s1 f32=f6,f14,f33
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // P78=C_7+C_8*r
+ fma.s1 f10=f11,f6,f10
+ // r26=2^{63}
+ shl r26=r26,63
+}
+{.mfi
+ nop.m 0
+ // P34=C_3+r*C_4
+ fma.s1 f15=f42,f6,f15
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // r2=r*r
+ fma.s1 f11=f6,f6,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P56=C_5+C_6*r
+ fma.s1 f13=f13,f6,f12
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // Rth-r
+ (p13) fms.s1 f43=f43,f1,f6
+ nop.i 0
+}
+{.mfi
+ // significand(x)=1 ?
+ cmp.eq p0,p6=r25,r26
+ // P12=C1l+C_2*r
+ fma.s1 f34=f35,f6,f34
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p12
+{.mfi
+ nop.m 0
+ // p12: C1r=(2^{16}+C1*r)-2^{16}
+ (p12) fms.s1 f32=f33,f1,f32
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // p8: C1r=C1*r (double extended)
+ (p8) fms.s1 f32=f32,f1,f33
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // L(x*y)*C_1+T_low
+ (p13) fma.s1 f36=f41,f14,f36
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // P58=P56+r2*P78
+ fma.s1 f13=f11,f10,f13
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P14=P12+r2*P34
+ fma.s1 f15=f15,f11,f34
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r4=r2*r2
+ fma.s1 f11=f11,f11,f0
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // normalize additive term (l=exponent of x)
+ fcvt.xf f8=f8
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // D=C1*r-C1r
+ (p6) fms.s1 f12=f14,f6,f32
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // T_low'=(Rth-r)*C1+(L(x*y)*C1+T_low)
+ (p13) fma.s1 f36=f43,f14,f36
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // P18=P14+r4*P58
+ (p6) fma.s1 f13=f11,f13,f15
+ nop.i 0;;
+}
+
+{.mfi
+ nop.m 0
+ // add T_high+l
+ (p6) fma.s1 f8=f8,f1,f7
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // D+T_low
+ (p6) fma.s1 f12=f12,f1,f36
+ nop.i 0;;
+}
+
+
+{.mfi
+ nop.m 0
+ // (T_high+l)+C1r
+ (p6) fma.s1 f8=f8,f1,f32
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // (D+T_low)+r*P18
+ (p6) fma.s1 f13=f13,f6,f12
+ nop.i 0;;
+}
+
+//{.mfb
+//nop.m 0
+//mov f8=f36
+//fma.s0 f8=f13,f6,f0
+//br.ret.sptk b0;;
+//}
+
+
+{.mfb
+ nop.m 0
+ // result=((T_high+l)+C1r)+((D+T_low)+r*P18)
+ (p6) fma.s0 f8=f13,f1,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+SPECIAL_log2l:
+{.mfi
+ nop.m 0
+ mov FR_X=f8
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // x=+Infinity ?
+ fclass.m p7,p0=f8,0x21
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // x=+/-Zero ?
+ fclass.m p8,p0=f7,0x7
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ // x=-Infinity, -normal, -denormal ?
+ fclass.m p6,p0=f8,0x3a
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ // log2l(+Infinity)=+Infinity
+ nop.f 0
+ (p7) br.ret.spnt b0;;
+}
+{.mfi
+ (p8) mov GR_Parameter_TAG = 168
+ // log2l(+/-0)=-infinity, raises Divide by Zero
+ // set f8=-0
+ (p8) fmerge.ns f8=f0,f8
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ (p8) frcpa.s0 f8,p0=f1,f8
+ (p8) br.cond.sptk __libm_error_region;;
+}
+{.mfb
+ (p6) mov GR_Parameter_TAG = 169
+ // x<0: return NaN, raise Invalid
+ (p6) frcpa.s0 f8,p0=f0,f0
+ (p6) br.cond.sptk __libm_error_region;;
+}
+
+
+{.mfb
+ nop.m 0
+ // Remaining cases: NaNs
+ fma.s0 f8=f8,f1,f0
+ br.ret.sptk b0;;
+}
+
+LOG2_PSEUDO_ZERO:
+
+{.mfi
+ nop.m 0
+ mov FR_X=f8
+ nop.i 0
+}
+{.mfi
+ mov GR_Parameter_TAG = 168
+ // log2l(+/-0)=-infinity, raises Divide by Zero
+ // set f8=-0
+ fmerge.ns f8=f0,f8
+ nop.i 0;;
+}
+{.mfb
+ nop.m 0
+ frcpa.s0 f8,p0=f1,f8
+ br.cond.sptk __libm_error_region;;
+}
+
+
+GLOBAL_IEEE754_END(log2l)
+libm_alias_ldouble_other (__log2, log2)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_log_data.c b/sysdeps/ia64/fpu/e_log_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_log_data.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_logf.S b/sysdeps/ia64/fpu/e_logf.S
new file mode 100644
index 0000000000..f317c567da
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logf.S
@@ -0,0 +1,1165 @@
+.file "logf.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 03/01/00 Initial version
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 01/10/01 Improved speed, fixed flags for neg denormals
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 05/23/02 Modified algorithm. Now only one polynomial is used
+// for |x-1| >= 1/256 and for |x-1| < 1/256
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//==============================================================
+// float logf(float)
+// float log10f(float)
+//
+//
+// Overview of operation
+//==============================================================
+// Background
+// ----------
+//
+// This algorithm is based on fact that
+// log(a b) = log(a) + log(b).
+//
+// In our case we have x = 2^N f, where 1 <= f < 2.
+// So
+// log(x) = log(2^N f) = log(2^N) + log(f) = n*log(2) + log(f)
+//
+// To calculate log(f) we do following
+// log(f) = log(f * frcpa(f) / frcpa(f)) =
+// = log(f * frcpa(f)) + log(1/frcpa(f))
+//
+// According to definition of IA-64's frcpa instruction it's a
+// floating point that approximates 1/f using a lookup on the
+// top of 8 bits of the input number's significand with relative
+// error < 2^(-8.886). So we have following
+//
+// |(1/f - frcpa(f)) / (1/f))| = |1 - f*frcpa(f)| < 1/256
+//
+// and
+//
+// log(f) = log(f * frcpa(f)) + log(1/frcpa(f)) =
+// = log(1 + r) + T
+//
+// The first value can be computed by polynomial P(r) approximating
+// log(1 + r) on |r| < 1/256 and the second is precomputed tabular
+// value defined by top 8 bit of f.
+//
+// Finally we have that log(x) ~ (N*log(2) + T) + P(r)
+//
+// Note that if input argument is close to 1.0 (in our case it means
+// that |1 - x| < 1/256) we can use just polynomial approximation
+// because x = 2^0 * f = f = 1 + r and
+// log(x) = log(1 + r) ~ P(r)
+//
+//
+// To compute log10(x) we just use identity:
+//
+// log10(x) = log(x)/log(10)
+//
+// so we have that
+//
+// log10(x) = (N*log(2) + T + log(1+r)) / log(10) =
+// = N*(log(2)/log(10)) + (T/log(10)) + log(1 + r)/log(10)
+//
+//
+// Implementation
+// --------------
+// It can be seen that formulas for log and log10 differ from one another
+// only by coefficients and tabular values. Namely as log as log10 are
+// calculated as (N*L1 + T) + L2*Series(r) where in case of log
+// L1 = log(2)
+// T = log(1/frcpa(x))
+// L2 = 1.0
+// and in case of log10
+// L1 = log(2)/log(10)
+// T = log(1/frcpa(x))/log(10)
+// L2 = 1.0/log(10)
+//
+// So common code with two different entry points those set pointers
+// to the base address of corresponding data sets containing values
+// of L2,T and prepare integer representation of L1 needed for following
+// setf instruction can be used.
+//
+// Note that both log and log10 use common approximation polynomial
+// it means we need only one set of coefficients of approximation.
+//
+// 1. Computation of log(x) for |x-1| >= 1/256
+// InvX = frcpa(x)
+// r = InvX*x - 1
+// P(r) = r*((1 - A2*r) + r^2*(A3 - A4*r)) = r*P2(r),
+// A4,A3,A2 are created with setf inctruction.
+// We use Taylor series and so A4 = 1/4, A3 = 1/3,
+// A2 = 1/2 rounded to double.
+//
+// N = float(n) where n is true unbiased exponent of x
+//
+// T is tabular value of log(1/frcpa(x)) calculated in quad precision
+// and rounded to double. To T we get bits from 55 to 62 of register
+// format significand of x and calculate address
+// ad_T = table_base_addr + 8 * index
+//
+// L2 (1.0 or 1.0/log(10) depending on function) is calculated in quad
+// precision and rounded to double; it's loaded from memory
+//
+// L1 (log(2) or log10(2) depending on function) is calculated in quad
+// precision and rounded to double; it's created with setf.
+//
+// And final result = P2(r)*(r*L2) + (T + N*L1)
+//
+//
+// 2. Computation of log(x) for |x-1| < 1/256
+// r = x - 1
+// P(r) = r*((1 - A2*r) + r^2*(A3 - A4*r)) = r*P2(r),
+// A4,A3,A2 are the same as in case |x-1| >= 1/256
+//
+// And final result = P2(r)*(r*L2)
+//
+// 3. How we define is input argument such that |x-1| < 1/256 or not.
+//
+// To do it we analyze biased exponent and significand of input argument.
+//
+// a) First we test is biased exponent equal to 0xFFFE or 0xFFFF (i.e.
+// we test is 0.5 <= x < 2). This comparison can be performed using
+// unsigned version of cmp instruction in such a way
+// biased_exponent_of_x - 0xFFFE < 2
+//
+//
+// b) Second (in case when result of a) is true) we need to compare x
+// with 1-1/256 and 1+1/256 or in register format representation with
+// 0xFFFEFF00000000000000 and 0xFFFF8080000000000000 correspondingly.
+// As far as biased exponent of x here can be equal only to 0xFFFE or
+// 0xFFFF we need to test only last bit of it. Also signifigand always
+// has implicit bit set to 1 that can be excluded from comparison.
+// Thus it's quite enough to generate 64-bit integer bits of that are
+// ix[63] = biased_exponent_of_x[0] and ix[62-0] = significand_of_x[62-0]
+// and compare it with 0x7F00000000000000 and 0x80800000000000000 (those
+// obtained like ix from register representatinos of 255/256 and
+// 257/256). This comparison can be made like in a), using unsigned
+// version of cmp i.e. ix - 0x7F00000000000000 < 0x0180000000000000.
+// 0x0180000000000000 is difference between 0x80800000000000000 and
+// 0x7F00000000000000.
+//
+// Note: NaT, any NaNs, +/-INF, +/-0, negatives and unnormalized numbers are
+// filtered and processed on special branches.
+//
+//
+// Special values
+//==============================================================
+//
+// logf(+0) = -inf
+// logf(-0) = -inf
+//
+// logf(+qnan) = +qnan
+// logf(-qnan) = -qnan
+// logf(+snan) = +qnan
+// logf(-snan) = -qnan
+//
+// logf(-n) = QNAN Indefinite
+// logf(-inf) = QNAN Indefinite
+//
+// logf(+inf) = +inf
+//
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input
+// f12 -> f14, f33 -> f39
+//
+// General registers used:
+// r8 -> r11
+// r14 -> r19
+//
+// Predicate registers used:
+// p6 -> p12
+
+
+// Assembly macros
+//==============================================================
+
+GR_TAG = r8
+GR_ad_T = r8
+GR_N = r9
+GR_Exp = r10
+GR_Sig = r11
+
+GR_025 = r14
+GR_05 = r15
+GR_A3 = r16
+GR_Ind = r17
+GR_dx = r15
+GR_Ln2 = r19
+GR_de = r20
+GR_x = r21
+GR_xorg = r22
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_A2 = f12
+FR_A3 = f13
+FR_A4 = f14
+
+FR_RcpX = f33
+FR_r = f34
+FR_r2 = f35
+FR_tmp = f35
+FR_Ln2 = f36
+FR_T = f37
+FR_N = f38
+FR_NxLn2pT = f38
+FR_NormX = f39
+FR_InvLn10 = f40
+
+
+FR_Y = f1
+FR_X = f10
+FR_RESULT = f8
+
+
+// Data tables
+//==============================================================
+RODATA
+.align 16
+LOCAL_OBJECT_START(logf_data)
+data8 0x3FF0000000000000 // 1.0
+//
+// ln(1/frcpa(1+i/256)), i=0...255
+data8 0x3F60040155D5889E // 0
+data8 0x3F78121214586B54 // 1
+data8 0x3F841929F96832F0 // 2
+data8 0x3F8C317384C75F06 // 3
+data8 0x3F91A6B91AC73386 // 4
+data8 0x3F95BA9A5D9AC039 // 5
+data8 0x3F99D2A8074325F4 // 6
+data8 0x3F9D6B2725979802 // 7
+data8 0x3FA0C58FA19DFAAA // 8
+data8 0x3FA2954C78CBCE1B // 9
+data8 0x3FA4A94D2DA96C56 // 10
+data8 0x3FA67C94F2D4BB58 // 11
+data8 0x3FA85188B630F068 // 12
+data8 0x3FAA6B8ABE73AF4C // 13
+data8 0x3FAC441E06F72A9E // 14
+data8 0x3FAE1E6713606D07 // 15
+data8 0x3FAFFA6911AB9301 // 16
+data8 0x3FB0EC139C5DA601 // 17
+data8 0x3FB1DBD2643D190B // 18
+data8 0x3FB2CC7284FE5F1C // 19
+data8 0x3FB3BDF5A7D1EE64 // 20
+data8 0x3FB4B05D7AA012E0 // 21
+data8 0x3FB580DB7CEB5702 // 22
+data8 0x3FB674F089365A7A // 23
+data8 0x3FB769EF2C6B568D // 24
+data8 0x3FB85FD927506A48 // 25
+data8 0x3FB9335E5D594989 // 26
+data8 0x3FBA2B0220C8E5F5 // 27
+data8 0x3FBB0004AC1A86AC // 28
+data8 0x3FBBF968769FCA11 // 29
+data8 0x3FBCCFEDBFEE13A8 // 30
+data8 0x3FBDA727638446A2 // 31
+data8 0x3FBEA3257FE10F7A // 32
+data8 0x3FBF7BE9FEDBFDE6 // 33
+data8 0x3FC02AB352FF25F4 // 34
+data8 0x3FC097CE579D204D // 35
+data8 0x3FC1178E8227E47C // 36
+data8 0x3FC185747DBECF34 // 37
+data8 0x3FC1F3B925F25D41 // 38
+data8 0x3FC2625D1E6DDF57 // 39
+data8 0x3FC2D1610C86813A // 40
+data8 0x3FC340C59741142E // 41
+data8 0x3FC3B08B6757F2A9 // 42
+data8 0x3FC40DFB08378003 // 43
+data8 0x3FC47E74E8CA5F7C // 44
+data8 0x3FC4EF51F6466DE4 // 45
+data8 0x3FC56092E02BA516 // 46
+data8 0x3FC5D23857CD74D5 // 47
+data8 0x3FC6313A37335D76 // 48
+data8 0x3FC6A399DABBD383 // 49
+data8 0x3FC70337DD3CE41B // 50
+data8 0x3FC77654128F6127 // 51
+data8 0x3FC7E9D82A0B022D // 52
+data8 0x3FC84A6B759F512F // 53
+data8 0x3FC8AB47D5F5A310 // 54
+data8 0x3FC91FE49096581B // 55
+data8 0x3FC981634011AA75 // 56
+data8 0x3FC9F6C407089664 // 57
+data8 0x3FCA58E729348F43 // 58
+data8 0x3FCABB55C31693AD // 59
+data8 0x3FCB1E104919EFD0 // 60
+data8 0x3FCB94EE93E367CB // 61
+data8 0x3FCBF851C067555F // 62
+data8 0x3FCC5C0254BF23A6 // 63
+data8 0x3FCCC000C9DB3C52 // 64
+data8 0x3FCD244D99C85674 // 65
+data8 0x3FCD88E93FB2F450 // 66
+data8 0x3FCDEDD437EAEF01 // 67
+data8 0x3FCE530EFFE71012 // 68
+data8 0x3FCEB89A1648B971 // 69
+data8 0x3FCF1E75FADF9BDE // 70
+data8 0x3FCF84A32EAD7C35 // 71
+data8 0x3FCFEB2233EA07CD // 72
+data8 0x3FD028F9C7035C1C // 73
+data8 0x3FD05C8BE0D9635A // 74
+data8 0x3FD085EB8F8AE797 // 75
+data8 0x3FD0B9C8E32D1911 // 76
+data8 0x3FD0EDD060B78081 // 77
+data8 0x3FD122024CF0063F // 78
+data8 0x3FD14BE2927AECD4 // 79
+data8 0x3FD180618EF18ADF // 80
+data8 0x3FD1B50BBE2FC63B // 81
+data8 0x3FD1DF4CC7CF242D // 82
+data8 0x3FD214456D0EB8D4 // 83
+data8 0x3FD23EC5991EBA49 // 84
+data8 0x3FD2740D9F870AFB // 85
+data8 0x3FD29ECDABCDFA04 // 86
+data8 0x3FD2D46602ADCCEE // 87
+data8 0x3FD2FF66B04EA9D4 // 88
+data8 0x3FD335504B355A37 // 89
+data8 0x3FD360925EC44F5D // 90
+data8 0x3FD38BF1C3337E75 // 91
+data8 0x3FD3C25277333184 // 92
+data8 0x3FD3EDF463C1683E // 93
+data8 0x3FD419B423D5E8C7 // 94
+data8 0x3FD44591E0539F49 // 95
+data8 0x3FD47C9175B6F0AD // 96
+data8 0x3FD4A8B341552B09 // 97
+data8 0x3FD4D4F3908901A0 // 98
+data8 0x3FD501528DA1F968 // 99
+data8 0x3FD52DD06347D4F6 // 100
+data8 0x3FD55A6D3C7B8A8A // 101
+data8 0x3FD5925D2B112A59 // 102
+data8 0x3FD5BF406B543DB2 // 103
+data8 0x3FD5EC433D5C35AE // 104
+data8 0x3FD61965CDB02C1F // 105
+data8 0x3FD646A84935B2A2 // 106
+data8 0x3FD6740ADD31DE94 // 107
+data8 0x3FD6A18DB74A58C5 // 108
+data8 0x3FD6CF31058670EC // 109
+data8 0x3FD6F180E852F0BA // 110
+data8 0x3FD71F5D71B894F0 // 111
+data8 0x3FD74D5AEFD66D5C // 112
+data8 0x3FD77B79922BD37E // 113
+data8 0x3FD7A9B9889F19E2 // 114
+data8 0x3FD7D81B037EB6A6 // 115
+data8 0x3FD8069E33827231 // 116
+data8 0x3FD82996D3EF8BCB // 117
+data8 0x3FD85855776DCBFB // 118
+data8 0x3FD8873658327CCF // 119
+data8 0x3FD8AA75973AB8CF // 120
+data8 0x3FD8D992DC8824E5 // 121
+data8 0x3FD908D2EA7D9512 // 122
+data8 0x3FD92C59E79C0E56 // 123
+data8 0x3FD95BD750EE3ED3 // 124
+data8 0x3FD98B7811A3EE5B // 125
+data8 0x3FD9AF47F33D406C // 126
+data8 0x3FD9DF270C1914A8 // 127
+data8 0x3FDA0325ED14FDA4 // 128
+data8 0x3FDA33440224FA79 // 129
+data8 0x3FDA57725E80C383 // 130
+data8 0x3FDA87D0165DD199 // 131
+data8 0x3FDAAC2E6C03F896 // 132
+data8 0x3FDADCCC6FDF6A81 // 133
+data8 0x3FDB015B3EB1E790 // 134
+data8 0x3FDB323A3A635948 // 135
+data8 0x3FDB56FA04462909 // 136
+data8 0x3FDB881AA659BC93 // 137
+data8 0x3FDBAD0BEF3DB165 // 138
+data8 0x3FDBD21297781C2F // 139
+data8 0x3FDC039236F08819 // 140
+data8 0x3FDC28CB1E4D32FD // 141
+data8 0x3FDC4E19B84723C2 // 142
+data8 0x3FDC7FF9C74554C9 // 143
+data8 0x3FDCA57B64E9DB05 // 144
+data8 0x3FDCCB130A5CEBB0 // 145
+data8 0x3FDCF0C0D18F326F // 146
+data8 0x3FDD232075B5A201 // 147
+data8 0x3FDD490246DEFA6B // 148
+data8 0x3FDD6EFA918D25CD // 149
+data8 0x3FDD9509707AE52F // 150
+data8 0x3FDDBB2EFE92C554 // 151
+data8 0x3FDDEE2F3445E4AF // 152
+data8 0x3FDE148A1A2726CE // 153
+data8 0x3FDE3AFC0A49FF40 // 154
+data8 0x3FDE6185206D516E // 155
+data8 0x3FDE882578823D52 // 156
+data8 0x3FDEAEDD2EAC990C // 157
+data8 0x3FDED5AC5F436BE3 // 158
+data8 0x3FDEFC9326D16AB9 // 159
+data8 0x3FDF2391A2157600 // 160
+data8 0x3FDF4AA7EE03192D // 161
+data8 0x3FDF71D627C30BB0 // 162
+data8 0x3FDF991C6CB3B379 // 163
+data8 0x3FDFC07ADA69A910 // 164
+data8 0x3FDFE7F18EB03D3E // 165
+data8 0x3FE007C053C5002E // 166
+data8 0x3FE01B942198A5A1 // 167
+data8 0x3FE02F74400C64EB // 168
+data8 0x3FE04360BE7603AD // 169
+data8 0x3FE05759AC47FE34 // 170
+data8 0x3FE06B5F1911CF52 // 171
+data8 0x3FE078BF0533C568 // 172
+data8 0x3FE08CD9687E7B0E // 173
+data8 0x3FE0A10074CF9019 // 174
+data8 0x3FE0B5343A234477 // 175
+data8 0x3FE0C974C89431CE // 176
+data8 0x3FE0DDC2305B9886 // 177
+data8 0x3FE0EB524BAFC918 // 178
+data8 0x3FE0FFB54213A476 // 179
+data8 0x3FE114253DA97D9F // 180
+data8 0x3FE128A24F1D9AFF // 181
+data8 0x3FE1365252BF0865 // 182
+data8 0x3FE14AE558B4A92D // 183
+data8 0x3FE15F85A19C765B // 184
+data8 0x3FE16D4D38C119FA // 185
+data8 0x3FE18203C20DD133 // 186
+data8 0x3FE196C7BC4B1F3B // 187
+data8 0x3FE1A4A738B7A33C // 188
+data8 0x3FE1B981C0C9653D // 189
+data8 0x3FE1CE69E8BB106B // 190
+data8 0x3FE1DC619DE06944 // 191
+data8 0x3FE1F160A2AD0DA4 // 192
+data8 0x3FE2066D7740737E // 193
+data8 0x3FE2147DBA47A394 // 194
+data8 0x3FE229A1BC5EBAC3 // 195
+data8 0x3FE237C1841A502E // 196
+data8 0x3FE24CFCE6F80D9A // 197
+data8 0x3FE25B2C55CD5762 // 198
+data8 0x3FE2707F4D5F7C41 // 199
+data8 0x3FE285E0842CA384 // 200
+data8 0x3FE294294708B773 // 201
+data8 0x3FE2A9A2670AFF0C // 202
+data8 0x3FE2B7FB2C8D1CC1 // 203
+data8 0x3FE2C65A6395F5F5 // 204
+data8 0x3FE2DBF557B0DF43 // 205
+data8 0x3FE2EA64C3F97655 // 206
+data8 0x3FE3001823684D73 // 207
+data8 0x3FE30E97E9A8B5CD // 208
+data8 0x3FE32463EBDD34EA // 209
+data8 0x3FE332F4314AD796 // 210
+data8 0x3FE348D90E7464D0 // 211
+data8 0x3FE35779F8C43D6E // 212
+data8 0x3FE36621961A6A99 // 213
+data8 0x3FE37C299F3C366A // 214
+data8 0x3FE38AE2171976E7 // 215
+data8 0x3FE399A157A603E7 // 216
+data8 0x3FE3AFCCFE77B9D1 // 217
+data8 0x3FE3BE9D503533B5 // 218
+data8 0x3FE3CD7480B4A8A3 // 219
+data8 0x3FE3E3C43918F76C // 220
+data8 0x3FE3F2ACB27ED6C7 // 221
+data8 0x3FE4019C2125CA93 // 222
+data8 0x3FE4181061389722 // 223
+data8 0x3FE42711518DF545 // 224
+data8 0x3FE436194E12B6BF // 225
+data8 0x3FE445285D68EA69 // 226
+data8 0x3FE45BCC464C893A // 227
+data8 0x3FE46AED21F117FC // 228
+data8 0x3FE47A1527E8A2D3 // 229
+data8 0x3FE489445EFFFCCC // 230
+data8 0x3FE4A018BCB69835 // 231
+data8 0x3FE4AF5A0C9D65D7 // 232
+data8 0x3FE4BEA2A5BDBE87 // 233
+data8 0x3FE4CDF28F10AC46 // 234
+data8 0x3FE4DD49CF994058 // 235
+data8 0x3FE4ECA86E64A684 // 236
+data8 0x3FE503C43CD8EB68 // 237
+data8 0x3FE513356667FC57 // 238
+data8 0x3FE522AE0738A3D8 // 239
+data8 0x3FE5322E26867857 // 240
+data8 0x3FE541B5CB979809 // 241
+data8 0x3FE55144FDBCBD62 // 242
+data8 0x3FE560DBC45153C7 // 243
+data8 0x3FE5707A26BB8C66 // 244
+data8 0x3FE587F60ED5B900 // 245
+data8 0x3FE597A7977C8F31 // 246
+data8 0x3FE5A760D634BB8B // 247
+data8 0x3FE5B721D295F10F // 248
+data8 0x3FE5C6EA94431EF9 // 249
+data8 0x3FE5D6BB22EA86F6 // 250
+data8 0x3FE5E6938645D390 // 251
+data8 0x3FE5F673C61A2ED2 // 252
+data8 0x3FE6065BEA385926 // 253
+data8 0x3FE6164BFA7CC06B // 254
+data8 0x3FE62643FECF9743 // 255
+LOCAL_OBJECT_END(logf_data)
+
+LOCAL_OBJECT_START(log10f_data)
+data8 0x3FDBCB7B1526E50E // 1/ln(10)
+//
+// ln(1/frcpa(1+i/256))/ln(10), i=0...255
+data8 0x3F4BD27045BFD025 // 0
+data8 0x3F64E84E793A474A // 1
+data8 0x3F7175085AB85FF0 // 2
+data8 0x3F787CFF9D9147A5 // 3
+data8 0x3F7EA9D372B89FC8 // 4
+data8 0x3F82DF9D95DA961C // 5
+data8 0x3F866DF172D6372C // 6
+data8 0x3F898D79EF5EEDF0 // 7
+data8 0x3F8D22ADF3F9579D // 8
+data8 0x3F9024231D30C398 // 9
+data8 0x3F91F23A98897D4A // 10
+data8 0x3F93881A7B818F9E // 11
+data8 0x3F951F6E1E759E35 // 12
+data8 0x3F96F2BCE7ADC5B4 // 13
+data8 0x3F988D362CDF359E // 14
+data8 0x3F9A292BAF010982 // 15
+data8 0x3F9BC6A03117EB97 // 16
+data8 0x3F9D65967DE3AB09 // 17
+data8 0x3F9F061167FC31E8 // 18
+data8 0x3FA05409E4F7819C // 19
+data8 0x3FA125D0432EA20E // 20
+data8 0x3FA1F85D440D299B // 21
+data8 0x3FA2AD755749617D // 22
+data8 0x3FA381772A00E604 // 23
+data8 0x3FA45643E165A70B // 24
+data8 0x3FA52BDD034475B8 // 25
+data8 0x3FA5E3966B7E9295 // 26
+data8 0x3FA6BAAF47C5B245 // 27
+data8 0x3FA773B3E8C4F3C8 // 28
+data8 0x3FA84C51EBEE8D15 // 29
+data8 0x3FA906A6786FC1CB // 30
+data8 0x3FA9C197ABF00DD7 // 31
+data8 0x3FAA9C78712191F7 // 32
+data8 0x3FAB58C09C8D637C // 33
+data8 0x3FAC15A8BCDD7B7E // 34
+data8 0x3FACD331E2C2967C // 35
+data8 0x3FADB11ED766ABF4 // 36
+data8 0x3FAE70089346A9E6 // 37
+data8 0x3FAF2F96C6754AEE // 38
+data8 0x3FAFEFCA8D451FD6 // 39
+data8 0x3FB0585283764178 // 40
+data8 0x3FB0B913AAC7D3A7 // 41
+data8 0x3FB11A294F2569F6 // 42
+data8 0x3FB16B51A2696891 // 43
+data8 0x3FB1CD03ADACC8BE // 44
+data8 0x3FB22F0BDD7745F5 // 45
+data8 0x3FB2916ACA38D1E8 // 46
+data8 0x3FB2F4210DF7663D // 47
+data8 0x3FB346A6C3C49066 // 48
+data8 0x3FB3A9FEBC60540A // 49
+data8 0x3FB3FD0C10A3AA54 // 50
+data8 0x3FB46107D3540A82 // 51
+data8 0x3FB4C55DD16967FE // 52
+data8 0x3FB51940330C000B // 53
+data8 0x3FB56D620EE7115E // 54
+data8 0x3FB5D2ABCF26178E // 55
+data8 0x3FB6275AA5DEBF81 // 56
+data8 0x3FB68D4EAF26D7EE // 57
+data8 0x3FB6E28C5C54A28D // 58
+data8 0x3FB7380B9665B7C8 // 59
+data8 0x3FB78DCCC278E85B // 60
+data8 0x3FB7F50C2CF2557A // 61
+data8 0x3FB84B5FD5EAEFD8 // 62
+data8 0x3FB8A1F6BAB2B226 // 63
+data8 0x3FB8F8D144557BDF // 64
+data8 0x3FB94FEFDCD61D92 // 65
+data8 0x3FB9A752EF316149 // 66
+data8 0x3FB9FEFAE7611EE0 // 67
+data8 0x3FBA56E8325F5C87 // 68
+data8 0x3FBAAF1B3E297BB4 // 69
+data8 0x3FBB079479C372AD // 70
+data8 0x3FBB6054553B12F7 // 71
+data8 0x3FBBB95B41AB5CE6 // 72
+data8 0x3FBC12A9B13FE079 // 73
+data8 0x3FBC6C4017382BEA // 74
+data8 0x3FBCB41FBA42686D // 75
+data8 0x3FBD0E38CE73393F // 76
+data8 0x3FBD689B2193F133 // 77
+data8 0x3FBDC3472B1D2860 // 78
+data8 0x3FBE0C06300D528B // 79
+data8 0x3FBE6738190E394C // 80
+data8 0x3FBEC2B50D208D9B // 81
+data8 0x3FBF0C1C2B936828 // 82
+data8 0x3FBF68216C9CC727 // 83
+data8 0x3FBFB1F6381856F4 // 84
+data8 0x3FC00742AF4CE5F8 // 85
+data8 0x3FC02C64906512D2 // 86
+data8 0x3FC05AF1E63E03B4 // 87
+data8 0x3FC0804BEA723AA9 // 88
+data8 0x3FC0AF1FD6711527 // 89
+data8 0x3FC0D4B2A8805A00 // 90
+data8 0x3FC0FA5EF136A06C // 91
+data8 0x3FC1299A4FB3E306 // 92
+data8 0x3FC14F806253C3ED // 93
+data8 0x3FC175805D1587C1 // 94
+data8 0x3FC19B9A637CA295 // 95
+data8 0x3FC1CB5FC26EDE17 // 96
+data8 0x3FC1F1B4E65F2590 // 97
+data8 0x3FC218248B5DC3E5 // 98
+data8 0x3FC23EAED62ADC76 // 99
+data8 0x3FC26553EBD337BD // 100
+data8 0x3FC28C13F1B11900 // 101
+data8 0x3FC2BCAA14381386 // 102
+data8 0x3FC2E3A740B7800F // 103
+data8 0x3FC30ABFD8F333B6 // 104
+data8 0x3FC331F403985097 // 105
+data8 0x3FC35943E7A60690 // 106
+data8 0x3FC380AFAC6E7C07 // 107
+data8 0x3FC3A8377997B9E6 // 108
+data8 0x3FC3CFDB771C9ADB // 109
+data8 0x3FC3EDA90D39A5DF // 110
+data8 0x3FC4157EC09505CD // 111
+data8 0x3FC43D7113FB04C1 // 112
+data8 0x3FC4658030AD1CCF // 113
+data8 0x3FC48DAC404638F6 // 114
+data8 0x3FC4B5F56CBBB869 // 115
+data8 0x3FC4DE5BE05E7583 // 116
+data8 0x3FC4FCBC0776FD85 // 117
+data8 0x3FC525561E9256EE // 118
+data8 0x3FC54E0DF3198865 // 119
+data8 0x3FC56CAB7112BDE2 // 120
+data8 0x3FC59597BA735B15 // 121
+data8 0x3FC5BEA23A506FDA // 122
+data8 0x3FC5DD7E08DE382F // 123
+data8 0x3FC606BDD3F92355 // 124
+data8 0x3FC6301C518A501F // 125
+data8 0x3FC64F3770618916 // 126
+data8 0x3FC678CC14C1E2D8 // 127
+data8 0x3FC6981005ED2947 // 128
+data8 0x3FC6C1DB5F9BB336 // 129
+data8 0x3FC6E1488ECD2881 // 130
+data8 0x3FC70B4B2E7E41B9 // 131
+data8 0x3FC72AE209146BF9 // 132
+data8 0x3FC7551C81BD8DCF // 133
+data8 0x3FC774DD76CC43BE // 134
+data8 0x3FC79F505DB00E88 // 135
+data8 0x3FC7BF3BDE099F30 // 136
+data8 0x3FC7E9E7CAC437F9 // 137
+data8 0x3FC809FE4902D00D // 138
+data8 0x3FC82A2757995CBE // 139
+data8 0x3FC85525C625E098 // 140
+data8 0x3FC8757A79831887 // 141
+data8 0x3FC895E2058D8E03 // 142
+data8 0x3FC8C13437695532 // 143
+data8 0x3FC8E1C812EF32BE // 144
+data8 0x3FC9026F112197E8 // 145
+data8 0x3FC923294888880B // 146
+data8 0x3FC94EEA4B8334F3 // 147
+data8 0x3FC96FD1B639FC09 // 148
+data8 0x3FC990CCA66229AC // 149
+data8 0x3FC9B1DB33334843 // 150
+data8 0x3FC9D2FD740E6607 // 151
+data8 0x3FC9FF49EEDCB553 // 152
+data8 0x3FCA209A84FBCFF8 // 153
+data8 0x3FCA41FF1E43F02B // 154
+data8 0x3FCA6377D2CE9378 // 155
+data8 0x3FCA8504BAE0D9F6 // 156
+data8 0x3FCAA6A5EEEBEFE3 // 157
+data8 0x3FCAC85B878D7879 // 158
+data8 0x3FCAEA259D8FFA0B // 159
+data8 0x3FCB0C0449EB4B6B // 160
+data8 0x3FCB2DF7A5C50299 // 161
+data8 0x3FCB4FFFCA70E4D1 // 162
+data8 0x3FCB721CD17157E3 // 163
+data8 0x3FCB944ED477D4ED // 164
+data8 0x3FCBB695ED655C7D // 165
+data8 0x3FCBD8F2364AEC0F // 166
+data8 0x3FCBFB63C969F4FF // 167
+data8 0x3FCC1DEAC134D4E9 // 168
+data8 0x3FCC4087384F4F80 // 169
+data8 0x3FCC6339498F09E2 // 170
+data8 0x3FCC86010FFC076C // 171
+data8 0x3FCC9D3D065C5B42 // 172
+data8 0x3FCCC029375BA07A // 173
+data8 0x3FCCE32B66978BA4 // 174
+data8 0x3FCD0643AFD51404 // 175
+data8 0x3FCD29722F0DEA45 // 176
+data8 0x3FCD4CB70070FE44 // 177
+data8 0x3FCD6446AB3F8C96 // 178
+data8 0x3FCD87B0EF71DB45 // 179
+data8 0x3FCDAB31D1FE99A7 // 180
+data8 0x3FCDCEC96FDC888F // 181
+data8 0x3FCDE6908876357A // 182
+data8 0x3FCE0A4E4A25C200 // 183
+data8 0x3FCE2E2315755E33 // 184
+data8 0x3FCE461322D1648A // 185
+data8 0x3FCE6A0E95C7787B // 186
+data8 0x3FCE8E216243DD60 // 187
+data8 0x3FCEA63AF26E007C // 188
+data8 0x3FCECA74ED15E0B7 // 189
+data8 0x3FCEEEC692CCD25A // 190
+data8 0x3FCF070A36B8D9C1 // 191
+data8 0x3FCF2B8393E34A2D // 192
+data8 0x3FCF5014EF538A5B // 193
+data8 0x3FCF68833AF1B180 // 194
+data8 0x3FCF8D3CD9F3F04F // 195
+data8 0x3FCFA5C61ADD93E9 // 196
+data8 0x3FCFCAA8567EBA7A // 197
+data8 0x3FCFE34CC8743DD8 // 198
+data8 0x3FD0042BFD74F519 // 199
+data8 0x3FD016BDF6A18017 // 200
+data8 0x3FD023262F907322 // 201
+data8 0x3FD035CCED8D32A1 // 202
+data8 0x3FD042430E869FFC // 203
+data8 0x3FD04EBEC842B2E0 // 204
+data8 0x3FD06182E84FD4AC // 205
+data8 0x3FD06E0CB609D383 // 206
+data8 0x3FD080E60BEC8F12 // 207
+data8 0x3FD08D7E0D894735 // 208
+data8 0x3FD0A06CC96A2056 // 209
+data8 0x3FD0AD131F3B3C55 // 210
+data8 0x3FD0C01771E775FB // 211
+data8 0x3FD0CCCC3CAD6F4B // 212
+data8 0x3FD0D986D91A34A9 // 213
+data8 0x3FD0ECA9B8861A2D // 214
+data8 0x3FD0F972F87FF3D6 // 215
+data8 0x3FD106421CF0E5F7 // 216
+data8 0x3FD11983EBE28A9D // 217
+data8 0x3FD12661E35B785A // 218
+data8 0x3FD13345D2779D3B // 219
+data8 0x3FD146A6F597283A // 220
+data8 0x3FD15399E81EA83D // 221
+data8 0x3FD16092E5D3A9A6 // 222
+data8 0x3FD17413C3B7AB5E // 223
+data8 0x3FD1811BF629D6FB // 224
+data8 0x3FD18E2A47B46686 // 225
+data8 0x3FD19B3EBE1A4418 // 226
+data8 0x3FD1AEE9017CB450 // 227
+data8 0x3FD1BC0CED7134E2 // 228
+data8 0x3FD1C93712ABC7FF // 229
+data8 0x3FD1D66777147D3F // 230
+data8 0x3FD1EA3BD1286E1C // 231
+data8 0x3FD1F77BED932C4C // 232
+data8 0x3FD204C25E1B031F // 233
+data8 0x3FD2120F28CE69B1 // 234
+data8 0x3FD21F6253C48D01 // 235
+data8 0x3FD22CBBE51D60AA // 236
+data8 0x3FD240CE4C975444 // 237
+data8 0x3FD24E37F8ECDAE8 // 238
+data8 0x3FD25BA8215AF7FC // 239
+data8 0x3FD2691ECC29F042 // 240
+data8 0x3FD2769BFFAB2E00 // 241
+data8 0x3FD2841FC23952C9 // 242
+data8 0x3FD291AA1A384978 // 243
+data8 0x3FD29F3B0E15584B // 244
+data8 0x3FD2B3A0EE479DF7 // 245
+data8 0x3FD2C142842C09E6 // 246
+data8 0x3FD2CEEACCB7BD6D // 247
+data8 0x3FD2DC99CE82FF21 // 248
+data8 0x3FD2EA4F902FD7DA // 249
+data8 0x3FD2F80C186A25FD // 250
+data8 0x3FD305CF6DE7B0F7 // 251
+data8 0x3FD3139997683CE7 // 252
+data8 0x3FD3216A9BB59E7C // 253
+data8 0x3FD32F4281A3CEFF // 254
+data8 0x3FD33D2150110092 // 255
+LOCAL_OBJECT_END(log10f_data)
+
+
+// Code
+//==============================================================
+.section .text
+
+// logf has p13 true, p14 false
+// log10f has p14 true, p13 false
+
+GLOBAL_IEEE754_ENTRY(log10f)
+{ .mfi
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xFFFE // biased exponent of A2=0.5
+}
+{ .mlx
+ addl GR_ad_T = @ltoff(log10f_data),gp
+ movl GR_A3 = 0x3FD5555555555555 // double precision memory
+ // representation of A3
+};;
+{ .mfi
+ getf.sig GR_Sig = f8 // if x is unorm then must recompute
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ sub GR_025 = GR_05,r0,1 // biased exponent of A4=0.25
+}
+{ .mlx
+ ld8 GR_ad_T = [GR_ad_T]
+ movl GR_Ln2 = 0x3FD34413509F79FF // double precision memory
+ // representation of
+ // log(2)/ln(10)
+};;
+{ .mfi
+ setf.d FR_A3 = GR_A3 // create A3
+ fcmp.eq.s1 p14,p13 = f0,f0 // set p14 to 1 for log10f
+ dep.z GR_xorg = GR_05,55,8 // 0x7F00000000000000 integer number
+ // bits of that are
+ // GR_xorg[63] = last bit of biased
+ // exponent of 255/256
+ // GR_xorg[62-0] = bits from 62 to 0
+ // of significand of 255/256
+}
+{ .mib
+ setf.exp FR_A2 = GR_05 // create A2
+ sub GR_de = GR_Exp,GR_05 // biased_exponent_of_x - 0xFFFE
+ // needed for comparison with 0.5 and 2.0
+ br.cond.sptk logf_log10f_common
+};;
+GLOBAL_IEEE754_END(log10f)
+libm_alias_float_other (__log10, log10)
+
+GLOBAL_IEEE754_ENTRY(logf)
+{ .mfi
+ getf.exp GR_Exp = f8 // if x is unorm then must recompute
+ frcpa.s1 FR_RcpX,p0 = f1,f8
+ mov GR_05 = 0xFFFE // biased exponent of A2=-0.5
+}
+{ .mlx
+ addl GR_ad_T = @ltoff(logf_data),gp
+ movl GR_A3 = 0x3FD5555555555555 // double precision memory
+ // representation of A3
+};;
+{ .mfi
+ getf.sig GR_Sig = f8 // if x is unorm then must recompute
+ fclass.m p8,p0 = f8,9 // is x positive unorm?
+ dep.z GR_xorg = GR_05,55,8 // 0x7F00000000000000 integer number
+ // bits of that are
+ // GR_xorg[63] = last bit of biased
+ // exponent of 255/256
+ // GR_xorg[62-0] = bits from 62 to 0
+ // of significand of 255/256
+}
+{ .mfi
+ ld8 GR_ad_T = [GR_ad_T]
+ nop.f 0
+ sub GR_025 = GR_05,r0,1 // biased exponent of A4=0.25
+};;
+{ .mfi
+ setf.d FR_A3 = GR_A3 // create A3
+ fcmp.eq.s1 p13,p14 = f0,f0 // p13 - true for logf
+ sub GR_de = GR_Exp,GR_05 // biased_exponent_of_x - 0xFFFE
+ // needed for comparison with 0.5 and 2.0
+}
+{ .mlx
+ setf.exp FR_A2 = GR_05 // create A2
+ movl GR_Ln2 = 0x3FE62E42FEFA39EF // double precision memory
+ // representation of log(2)
+};;
+logf_log10f_common:
+{ .mfi
+ setf.exp FR_A4 = GR_025 // create A4=0.25
+ fclass.m p9,p0 = f8,0x3A // is x < 0 (including negateve unnormals)?
+ dep GR_x = GR_Exp,GR_Sig,63,1 // produce integer that bits are
+ // GR_x[63] = GR_Exp[0]
+ // GR_x[62-0] = GR_Sig[62-0]
+}
+{ .mib
+ sub GR_N = GR_Exp,GR_05,1 // unbiased exponent of x
+ cmp.gtu p6,p7 = 2,GR_de // is 0.5 <= x < 2.0?
+(p8) br.cond.spnt logf_positive_unorm
+};;
+logf_core:
+{ .mfi
+ setf.sig FR_N = GR_N // copy unbiased exponent of x to the
+ // significand field of FR_N
+ fclass.m p10,p0 = f8,0x1E1 // is x NaN, NaT or +Inf?
+ dep.z GR_dx = GR_05,54,3 // 0x0180000000000000 - difference
+ // between our integer representations
+ // of 257/256 and 255/256
+}
+{ .mfi
+ nop.m 0
+ nop.f 0
+ sub GR_x = GR_x,GR_xorg // difference between representations
+ // of x and 255/256
+};;
+{ .mfi
+ ldfd FR_InvLn10 = [GR_ad_T],8
+ fcmp.eq.s1 p11,p0 = f8,f1 // is x equal to 1.0?
+ extr.u GR_Ind = GR_Sig,55,8 // get bits from 55 to 62 as index
+}
+{ .mib
+ setf.d FR_Ln2 = GR_Ln2 // create log(2) or log10(2)
+(p6) cmp.gtu p6,p7 = GR_dx,GR_x // set p6 if 255/256 <= x < 257/256
+(p9) br.cond.spnt logf_negatives // jump if input argument is negative number
+};;
+// p6 is true if |x-1| < 1/256
+// p7 is true if |x-1| >= 1/256
+.pred.rel "mutex",p6,p7
+{ .mfi
+ shladd GR_ad_T = GR_Ind,3,GR_ad_T // calculate address of T
+(p7) fms.s1 FR_r = FR_RcpX,f8,f1 // range reduction for |x-1|>=1/256
+ extr.u GR_Exp = GR_Exp,0,17 // exponent without sign
+}
+{ .mfb
+ nop.m 0
+(p6) fms.s1 FR_r = f8,f1,f1 // range reduction for |x-1|<1/256
+(p10) br.cond.spnt logf_nan_nat_pinf // exit for NaN, NaT or +Inf
+};;
+{ .mfb
+ ldfd FR_T = [GR_ad_T] // load T
+(p11) fma.s.s0 f8 = f0,f0,f0
+(p11) br.ret.spnt b0 // exit for x = 1.0
+};;
+{ .mib
+ nop.m 0
+ cmp.eq p12,p0 = r0,GR_Exp // is x +/-0? (here it's quite enough
+ // only to compare exponent with 0
+ // because all unnormals already
+ // have been filtered)
+(p12) br.cond.spnt logf_zeroes // Branch if input argument is +/-0
+};;
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_A2 = FR_A2,FR_r,f1 // A2*r+1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_r2 = FR_r,FR_r,f0 // r^2
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fcvt.xf FR_N = FR_N // convert integer N in significand of FR_N
+ // to floating-point representation
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 FR_A3 = FR_A4,FR_r,FR_A3 // A4*r+A3
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_r = FR_r,FR_InvLn10,f0 // For log10f we have r/log(10)
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ nop.f 0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ fma.s1 FR_A2 = FR_A3,FR_r2,FR_A2 // (A4*r+A3)*r^2+(A2*r+1)
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 FR_NxLn2pT = FR_N,FR_Ln2,FR_T // N*Ln2+T
+ nop.i 0
+};;
+.pred.rel "mutex",p6,p7
+{ .mfi
+ nop.m 0
+(p7) fma.s.s0 f8 = FR_A2,FR_r,FR_NxLn2pT // result for |x-1|>=1/256
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p6) fma.s.s0 f8 = FR_A2,FR_r,f0 // result for |x-1|<1/256
+ br.ret.sptk b0
+};;
+
+.align 32
+logf_positive_unorm:
+{ .mfi
+ nop.m 0
+(p8) fma.s0 f8 = f8,f1,f0 // Normalize & set D-flag
+ nop.i 0
+};;
+{ .mfi
+ getf.exp GR_Exp = f8 // recompute biased exponent
+ nop.f 0
+ cmp.ne p6,p7 = r0,r0 // p6 <- 0, p7 <- 1 because
+ // in case of unorm we are out
+ // interval [255/256; 257/256]
+};;
+{ .mfi
+ getf.sig GR_Sig = f8 // recompute significand
+ nop.f 0
+ nop.i 0
+};;
+{ .mib
+ sub GR_N = GR_Exp,GR_05,1 // unbiased exponent N
+ nop.i 0
+ br.cond.sptk logf_core // return into main path
+};;
+
+.align 32
+logf_nan_nat_pinf:
+{ .mfi
+ nop.m 0
+ fma.s.s0 f8 = f8,f1,f0 // set V-flag
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ nop.f 0
+ br.ret.sptk b0 // exit for NaN, NaT or +Inf
+};;
+
+.align 32
+logf_zeroes:
+{ .mfi
+ nop.m 0
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
+}
+{ .mfi
+(p13) mov GR_TAG = 4 // set libm error in case of logf
+ fms.s1 FR_tmp = f0,f0,f1 // -1.0
+ nop.i 0
+};;
+{ .mfi
+ nop.m 0
+ frcpa.s0 f8,p0 = FR_tmp,f0 // log(+/-0) should be equal to -INF.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of FR_tmp/f0.
+ // As far as FR_tmp is -1 it'll be -INF
+ nop.i 0
+}
+{ .mib
+(p14) mov GR_TAG = 10 // set libm error in case of log10f
+ nop.i 0
+ br.cond.sptk logf_libm_err
+};;
+
+.align 32
+logf_negatives:
+{ .mfi
+(p13) mov GR_TAG = 5 // set libm error in case of logf
+ fmerge.s FR_X = f8,f8 // keep input argument for subsequent
+ // call of __libm_error_support#
+ nop.i 0
+};;
+{ .mfi
+(p14) mov GR_TAG = 11 // set libm error in case of log10f
+ frcpa.s0 f8,p0 = f0,f0 // log(negatives) should be equal to NaN.
+ // We can get it using frcpa because it
+ // sets result to the IEEE-754 mandated
+ // quotient of f0/f0 i.e. NaN.
+ nop.i 0
+};;
+
+.align 32
+logf_libm_err:
+{ .mmi
+ alloc r32 = ar.pfs,1,4,4,0
+ mov GR_Parameter_TAG = GR_TAG
+ nop.i 0
+};;
+GLOBAL_IEEE754_END(logf)
+libm_alias_float_other (__log, log)
+#ifdef SHARED
+.symver logf,logf@@GLIBC_2.27
+.weak __logf_compat
+.set __logf_compat,__logf
+.symver __logf_compat,logf@GLIBC_2.2
+#endif
+
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_logf_data.c b/sysdeps/ia64/fpu/e_logf_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logf_data.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_logl.S b/sysdeps/ia64/fpu/e_logl.S
new file mode 100644
index 0000000000..004aee9d63
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_logl.S
@@ -0,0 +1,1201 @@
+.file "logl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// History:
+// 05/21/01 Extracted logl and log10l from log1pl.s file, and optimized
+// all paths.
+// 06/20/01 Fixed error tag for x=-inf.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
+//
+//*********************************************************************
+//
+//*********************************************************************
+//
+// Function: Combined logl(x) and log10l(x) where
+// logl(x) = ln(x), for double-extended precision x values
+// log10l(x) = log (x), for double-extended precision x values
+// 10
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f34-f76
+//
+// General Purpose Registers:
+// r32-r56
+// r53-r56 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6-p14
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions cannot occur
+// Underflow exceptions raised when appropriate for log1p
+// (Error Handling Routine called for underflow)
+// Inexact raised when appropriate by algorithm
+//
+// logl(inf) = inf
+// logl(-inf) = QNaN
+// logl(+/-0) = -inf
+// logl(SNaN) = QNaN
+// logl(QNaN) = QNaN
+// logl(EM_special Values) = QNaN
+// log10l(inf) = inf
+// log10l(-inf) = QNaN
+// log10l(+/-0) = -inf
+// log10l(SNaN) = QNaN
+// log10l(QNaN) = QNaN
+// log10l(EM_special Values) = QNaN
+//
+//*********************************************************************
+//
+// Overview
+//
+// The method consists of two cases.
+//
+// If |X-1| < 2^(-7) use case log_near1;
+// else use case log_regular;
+//
+// Case log_near1:
+//
+// logl( 1 + X ) can be approximated by a simple polynomial
+// in W = X-1. This polynomial resembles the truncated Taylor
+// series W - W^/2 + W^3/3 - ...
+//
+// Case log_regular:
+//
+// Here we use a table lookup method. The basic idea is that in
+// order to compute logl(Arg) for an argument Arg in [1,2), we
+// construct a value G such that G*Arg is close to 1 and that
+// logl(1/G) is obtainable easily from a table of values calculated
+// beforehand. Thus
+//
+// logl(Arg) = logl(1/G) + logl(G*Arg)
+// = logl(1/G) + logl(1 + (G*Arg - 1))
+//
+// Because |G*Arg - 1| is small, the second term on the right hand
+// side can be approximated by a short polynomial. We elaborate
+// this method in four steps.
+//
+// Step 0: Initialization
+//
+// We need to calculate logl( X ). Obtain N, S_hi such that
+//
+// X = 2^N * S_hi exactly
+//
+// where S_hi in [1,2)
+//
+// Step 1: Argument Reduction
+//
+// Based on S_hi, obtain G_1, G_2, G_3 from a table and calculate
+//
+// G := G_1 * G_2 * G_3
+// r := (G * S_hi - 1)
+//
+// These G_j's have the property that the product is exactly
+// representable and that |r| < 2^(-12) as a result.
+//
+// Step 2: Approximation
+//
+//
+// logl(1 + r) is approximated by a short polynomial poly(r).
+//
+// Step 3: Reconstruction
+//
+//
+// Finally, logl( X ) is given by
+//
+// logl( X ) = logl( 2^N * S_hi )
+// ~=~ N*logl(2) + logl(1/G) + logl(1 + r)
+// ~=~ N*logl(2) + logl(1/G) + poly(r).
+//
+// **** Algorithm ****
+//
+// Case log_near1:
+//
+// Here we compute a simple polynomial. To exploit parallelism, we split
+// the polynomial into two portions.
+//
+// W := X - 1
+// Wsq := W * W
+// W4 := Wsq*Wsq
+// W6 := W4*Wsq
+// Y_hi := W + Wsq*(P_1 + W*(P_2 + W*(P_3 + W*P_4))
+// Y_lo := W6*(P_5 + W*(P_6 + W*(P_7 + W*P_8)))
+//
+// Case log_regular:
+//
+// We present the algorithm in four steps.
+//
+// Step 0. Initialization
+// ----------------------
+//
+// Z := X
+// N := unbaised exponent of Z
+// S_hi := 2^(-N) * Z
+//
+// Step 1. Argument Reduction
+// --------------------------
+//
+// Let
+//
+// Z = 2^N * S_hi = 2^N * 1.d_1 d_2 d_3 ... d_63
+//
+// We obtain G_1, G_2, G_3 by the following steps.
+//
+//
+// Define X_0 := 1.d_1 d_2 ... d_14. This is extracted
+// from S_hi.
+//
+// Define A_1 := 1.d_1 d_2 d_3 d_4. This is X_0 truncated
+// to lsb = 2^(-4).
+//
+// Define index_1 := [ d_1 d_2 d_3 d_4 ].
+//
+// Fetch Z_1 := (1/A_1) rounded UP in fixed point with
+// fixed point lsb = 2^(-15).
+// Z_1 looks like z_0.z_1 z_2 ... z_15
+// Note that the fetching is done using index_1.
+// A_1 is actually not needed in the implementation
+// and is used here only to explain how is the value
+// Z_1 defined.
+//
+// Fetch G_1 := (1/A_1) truncated to 21 sig. bits.
+// floating pt. Again, fetching is done using index_1. A_1
+// explains how G_1 is defined.
+//
+// Calculate X_1 := X_0 * Z_1 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 d_5 ... d_14
+// This is accomplished by integer multiplication.
+// It is proved that X_1 indeed always begin
+// with 1.0000 in fixed point.
+//
+//
+// Define A_2 := 1.0 0 0 0 d_5 d_6 d_7 d_8. This is X_1
+// truncated to lsb = 2^(-8). Similar to A_1,
+// A_2 is not needed in actual implementation. It
+// helps explain how some of the values are defined.
+//
+// Define index_2 := [ d_5 d_6 d_7 d_8 ].
+//
+// Fetch Z_2 := (1/A_2) rounded UP in fixed point with
+// fixed point lsb = 2^(-15). Fetch done using index_2.
+// Z_2 looks like z_0.z_1 z_2 ... z_15
+//
+// Fetch G_2 := (1/A_2) truncated to 21 sig. bits.
+// floating pt.
+//
+// Calculate X_2 := X_1 * Z_2 truncated to lsb = 2^(-14)
+// = 1.0 0 0 0 0 0 0 0 d_9 d_10 ... d_14
+// This is accomplished by integer multiplication.
+// It is proved that X_2 indeed always begin
+// with 1.00000000 in fixed point.
+//
+//
+// Define A_3 := 1.0 0 0 0 0 0 0 0 d_9 d_10 d_11 d_12 d_13 1.
+// This is 2^(-14) + X_2 truncated to lsb = 2^(-13).
+//
+// Define index_3 := [ d_9 d_10 d_11 d_12 d_13 ].
+//
+// Fetch G_3 := (1/A_3) truncated to 21 sig. bits.
+// floating pt. Fetch is done using index_3.
+//
+// Compute G := G_1 * G_2 * G_3.
+//
+// This is done exactly since each of G_j only has 21 sig. bits.
+//
+// Compute
+//
+// r := (G*S_hi - 1)
+//
+//
+// Step 2. Approximation
+// ---------------------
+//
+// This step computes an approximation to logl( 1 + r ) where r is the
+// reduced argument just obtained. It is proved that |r| <= 1.9*2^(-13);
+// thus logl(1+r) can be approximated by a short polynomial:
+//
+// logl(1+r) ~=~ poly = r + Q1 r^2 + ... + Q4 r^5
+//
+//
+// Step 3. Reconstruction
+// ----------------------
+//
+// This step computes the desired result of logl(X):
+//
+// logl(X) = logl( 2^N * S_hi )
+// = N*logl(2) + logl( S_hi )
+// = N*logl(2) + logl(1/G) +
+// logl(1 + G*S_hi - 1 )
+//
+// logl(2), logl(1/G_j) are stored as pairs of (single,double) numbers:
+// log2_hi, log2_lo, log1byGj_hi, log1byGj_lo. The high parts are
+// single-precision numbers and the low parts are double precision
+// numbers. These have the property that
+//
+// N*log2_hi + SUM ( log1byGj_hi )
+//
+// is computable exactly in double-extended precision (64 sig. bits).
+// Finally
+//
+// Y_hi := N*log2_hi + SUM ( log1byGj_hi )
+// Y_lo := poly_hi + [ poly_lo +
+// ( SUM ( log1byGj_lo ) + N*log2_lo ) ]
+//
+
+RODATA
+.align 64
+
+// ************* DO NOT CHANGE THE ORDER OF THESE TABLES *************
+
+// P_8, P_7, P_6, P_5, P_4, P_3, P_2, and P_1
+
+LOCAL_OBJECT_START(Constants_P)
+data8 0xE3936754EFD62B15,0x00003FFB
+data8 0x8003B271A5E56381,0x0000BFFC
+data8 0x9249248C73282DB0,0x00003FFC
+data8 0xAAAAAA9F47305052,0x0000BFFC
+data8 0xCCCCCCCCCCD17FC9,0x00003FFC
+data8 0x8000000000067ED5,0x0000BFFD
+data8 0xAAAAAAAAAAAAAAAA,0x00003FFD
+data8 0xFFFFFFFFFFFFFFFE,0x0000BFFD
+LOCAL_OBJECT_END(Constants_P)
+
+// log2_hi, log2_lo, Q_4, Q_3, Q_2, and Q_1
+
+LOCAL_OBJECT_START(Constants_Q)
+data8 0xB172180000000000,0x00003FFE
+data8 0x82E308654361C4C6,0x0000BFE2
+data8 0xCCCCCAF2328833CB,0x00003FFC
+data8 0x80000077A9D4BAFB,0x0000BFFD
+data8 0xAAAAAAAAAAABE3D2,0x00003FFD
+data8 0xFFFFFFFFFFFFDAB7,0x0000BFFD
+LOCAL_OBJECT_END(Constants_Q)
+
+// 1/ln10_hi, 1/ln10_lo
+
+LOCAL_OBJECT_START(Constants_1_by_LN10)
+data8 0xDE5BD8A937287195,0x00003FFD
+data8 0xD56EAABEACCF70C8,0x00003FBB
+LOCAL_OBJECT_END(Constants_1_by_LN10)
+
+
+// Z1 - 16 bit fixed
+
+LOCAL_OBJECT_START(Constants_Z_1)
+data4 0x00008000
+data4 0x00007879
+data4 0x000071C8
+data4 0x00006BCB
+data4 0x00006667
+data4 0x00006187
+data4 0x00005D18
+data4 0x0000590C
+data4 0x00005556
+data4 0x000051EC
+data4 0x00004EC5
+data4 0x00004BDB
+data4 0x00004925
+data4 0x0000469F
+data4 0x00004445
+data4 0x00004211
+LOCAL_OBJECT_END(Constants_Z_1)
+
+// G1 and H1 - IEEE single and h1 - IEEE double
+
+LOCAL_OBJECT_START(Constants_G_H_h1)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F70F0F0,0x3D785196
+data8 0x3DA163A6617D741C
+data4 0x3F638E38,0x3DF13843
+data8 0x3E2C55E6CBD3D5BB
+data4 0x3F579430,0x3E2FF9A0
+data8 0xBE3EB0BFD86EA5E7
+data4 0x3F4CCCC8,0x3E647FD6
+data8 0x3E2E6A8C86B12760
+data4 0x3F430C30,0x3E8B3AE7
+data8 0x3E47574C5C0739BA
+data4 0x3F3A2E88,0x3EA30C68
+data8 0x3E20E30F13E8AF2F
+data4 0x3F321640,0x3EB9CEC8
+data8 0xBE42885BF2C630BD
+data4 0x3F2AAAA8,0x3ECF9927
+data8 0x3E497F3497E577C6
+data4 0x3F23D708,0x3EE47FC5
+data8 0x3E3E6A6EA6B0A5AB
+data4 0x3F1D89D8,0x3EF8947D
+data8 0xBDF43E3CD328D9BE
+data4 0x3F17B420,0x3F05F3A1
+data8 0x3E4094C30ADB090A
+data4 0x3F124920,0x3F0F4303
+data8 0xBE28FBB2FC1FE510
+data4 0x3F0D3DC8,0x3F183EBF
+data8 0x3E3A789510FDE3FA
+data4 0x3F088888,0x3F20EC80
+data8 0x3E508CE57CC8C98F
+data4 0x3F042108,0x3F29516A
+data8 0xBE534874A223106C
+LOCAL_OBJECT_END(Constants_G_H_h1)
+
+// Z2 - 16 bit fixed
+
+LOCAL_OBJECT_START(Constants_Z_2)
+data4 0x00008000
+data4 0x00007F81
+data4 0x00007F02
+data4 0x00007E85
+data4 0x00007E08
+data4 0x00007D8D
+data4 0x00007D12
+data4 0x00007C98
+data4 0x00007C20
+data4 0x00007BA8
+data4 0x00007B31
+data4 0x00007ABB
+data4 0x00007A45
+data4 0x000079D1
+data4 0x0000795D
+data4 0x000078EB
+LOCAL_OBJECT_END(Constants_Z_2)
+
+// G2 and H2 - IEEE single and h2 - IEEE double
+
+LOCAL_OBJECT_START(Constants_G_H_h2)
+data4 0x3F800000,0x00000000
+data8 0x0000000000000000
+data4 0x3F7F00F8,0x3B7F875D
+data8 0x3DB5A11622C42273
+data4 0x3F7E03F8,0x3BFF015B
+data8 0x3DE620CF21F86ED3
+data4 0x3F7D08E0,0x3C3EE393
+data8 0xBDAFA07E484F34ED
+data4 0x3F7C0FC0,0x3C7E0586
+data8 0xBDFE07F03860BCF6
+data4 0x3F7B1880,0x3C9E75D2
+data8 0x3DEA370FA78093D6
+data4 0x3F7A2328,0x3CBDC97A
+data8 0x3DFF579172A753D0
+data4 0x3F792FB0,0x3CDCFE47
+data8 0x3DFEBE6CA7EF896B
+data4 0x3F783E08,0x3CFC15D0
+data8 0x3E0CF156409ECB43
+data4 0x3F774E38,0x3D0D874D
+data8 0xBE0B6F97FFEF71DF
+data4 0x3F766038,0x3D1CF49B
+data8 0xBE0804835D59EEE8
+data4 0x3F757400,0x3D2C531D
+data8 0x3E1F91E9A9192A74
+data4 0x3F748988,0x3D3BA322
+data8 0xBE139A06BF72A8CD
+data4 0x3F73A0D0,0x3D4AE46F
+data8 0x3E1D9202F8FBA6CF
+data4 0x3F72B9D0,0x3D5A1756
+data8 0xBE1DCCC4BA796223
+data4 0x3F71D488,0x3D693B9D
+data8 0xBE049391B6B7C239
+LOCAL_OBJECT_END(Constants_G_H_h2)
+
+// G3 and H3 - IEEE single and h3 - IEEE double
+
+LOCAL_OBJECT_START(Constants_G_H_h3)
+data4 0x3F7FFC00,0x38800100
+data8 0x3D355595562224CD
+data4 0x3F7FF400,0x39400480
+data8 0x3D8200A206136FF6
+data4 0x3F7FEC00,0x39A00640
+data8 0x3DA4D68DE8DE9AF0
+data4 0x3F7FE400,0x39E00C41
+data8 0xBD8B4291B10238DC
+data4 0x3F7FDC00,0x3A100A21
+data8 0xBD89CCB83B1952CA
+data4 0x3F7FD400,0x3A300F22
+data8 0xBDB107071DC46826
+data4 0x3F7FCC08,0x3A4FF51C
+data8 0x3DB6FCB9F43307DB
+data4 0x3F7FC408,0x3A6FFC1D
+data8 0xBD9B7C4762DC7872
+data4 0x3F7FBC10,0x3A87F20B
+data8 0xBDC3725E3F89154A
+data4 0x3F7FB410,0x3A97F68B
+data8 0xBD93519D62B9D392
+data4 0x3F7FAC18,0x3AA7EB86
+data8 0x3DC184410F21BD9D
+data4 0x3F7FA420,0x3AB7E101
+data8 0xBDA64B952245E0A6
+data4 0x3F7F9C20,0x3AC7E701
+data8 0x3DB4B0ECAABB34B8
+data4 0x3F7F9428,0x3AD7DD7B
+data8 0x3D9923376DC40A7E
+data4 0x3F7F8C30,0x3AE7D474
+data8 0x3DC6E17B4F2083D3
+data4 0x3F7F8438,0x3AF7CBED
+data8 0x3DAE314B811D4394
+data4 0x3F7F7C40,0x3B03E1F3
+data8 0xBDD46F21B08F2DB1
+data4 0x3F7F7448,0x3B0BDE2F
+data8 0xBDDC30A46D34522B
+data4 0x3F7F6C50,0x3B13DAAA
+data8 0x3DCB0070B1F473DB
+data4 0x3F7F6458,0x3B1BD766
+data8 0xBDD65DDC6AD282FD
+data4 0x3F7F5C68,0x3B23CC5C
+data8 0xBDCDAB83F153761A
+data4 0x3F7F5470,0x3B2BC997
+data8 0xBDDADA40341D0F8F
+data4 0x3F7F4C78,0x3B33C711
+data8 0x3DCD1BD7EBC394E8
+data4 0x3F7F4488,0x3B3BBCC6
+data8 0xBDC3532B52E3E695
+data4 0x3F7F3C90,0x3B43BAC0
+data8 0xBDA3961EE846B3DE
+data4 0x3F7F34A0,0x3B4BB0F4
+data8 0xBDDADF06785778D4
+data4 0x3F7F2CA8,0x3B53AF6D
+data8 0x3DCC3ED1E55CE212
+data4 0x3F7F24B8,0x3B5BA620
+data8 0xBDBA31039E382C15
+data4 0x3F7F1CC8,0x3B639D12
+data8 0x3D635A0B5C5AF197
+data4 0x3F7F14D8,0x3B6B9444
+data8 0xBDDCCB1971D34EFC
+data4 0x3F7F0CE0,0x3B7393BC
+data8 0x3DC7450252CD7ADA
+data4 0x3F7F04F0,0x3B7B8B6D
+data8 0xBDB68F177D7F2A42
+LOCAL_OBJECT_END(Constants_G_H_h3)
+
+
+// Floating Point Registers
+
+FR_Input_X = f8
+
+FR_Y_hi = f34
+FR_Y_lo = f35
+
+FR_Scale = f36
+FR_X_Prime = f37
+FR_S_hi = f38
+FR_W = f39
+FR_G = f40
+
+FR_H = f41
+FR_wsq = f42
+FR_w4 = f43
+FR_h = f44
+FR_w6 = f45
+
+FR_G2 = f46
+FR_H2 = f47
+FR_poly_lo = f48
+FR_P8 = f49
+FR_poly_hi = f50
+
+FR_P7 = f51
+FR_h2 = f52
+FR_rsq = f53
+FR_P6 = f54
+FR_r = f55
+
+FR_log2_hi = f56
+FR_log2_lo = f57
+FR_p87 = f58
+FR_p876 = f58
+FR_p8765 = f58
+FR_float_N = f59
+FR_Q4 = f60
+
+FR_p43 = f61
+FR_p432 = f61
+FR_p4321 = f61
+FR_P4 = f62
+FR_G3 = f63
+FR_H3 = f64
+FR_h3 = f65
+
+FR_Q3 = f66
+FR_P3 = f67
+FR_Q2 = f68
+FR_P2 = f69
+FR_1LN10_hi = f70
+
+FR_Q1 = f71
+FR_P1 = f72
+FR_1LN10_lo = f73
+FR_P5 = f74
+FR_rcub = f75
+
+FR_Output_X_tmp = f76
+
+FR_X = f8
+FR_Y = f0
+FR_RESULT = f76
+
+
+// General Purpose Registers
+
+GR_ad_p = r33
+GR_Index1 = r34
+GR_Index2 = r35
+GR_signif = r36
+GR_X_0 = r37
+GR_X_1 = r38
+GR_X_2 = r39
+GR_Z_1 = r40
+GR_Z_2 = r41
+GR_N = r42
+GR_Bias = r43
+GR_M = r44
+GR_Index3 = r45
+GR_ad_p2 = r46
+GR_exp_mask = r47
+GR_exp_2tom7 = r48
+GR_ad_ln10 = r49
+GR_ad_tbl_1 = r50
+GR_ad_tbl_2 = r51
+GR_ad_tbl_3 = r52
+GR_ad_q = r53
+GR_ad_z_1 = r54
+GR_ad_z_2 = r55
+GR_ad_z_3 = r56
+
+//
+// Added for unwind support
+//
+
+GR_SAVE_PFS = r50
+GR_SAVE_B0 = r51
+GR_SAVE_GP = r52
+GR_Parameter_X = r53
+GR_Parameter_Y = r54
+GR_Parameter_RESULT = r55
+GR_Parameter_TAG = r56
+
+.section .text
+
+GLOBAL_IEEE754_ENTRY(logl)
+{ .mfi
+ alloc r32 = ar.pfs,0,21,4,0
+ fclass.m p6, p0 = FR_Input_X, 0x1E3 // Test for natval, nan, inf
+ cmp.eq p7, p14 = r0, r0 // Set p7 if logl
+}
+{ .mfb
+ addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
+ fnorm.s1 FR_X_Prime = FR_Input_X // Normalize x
+ br.cond.sptk LOGL_BEGIN
+}
+;;
+
+GLOBAL_IEEE754_END(logl)
+libm_alias_ldouble_other (__log, log)
+
+
+GLOBAL_IEEE754_ENTRY(log10l)
+{ .mfi
+ alloc r32 = ar.pfs,0,21,4,0
+ fclass.m p6, p0 = FR_Input_X, 0x1E3 // Test for natval, nan, inf
+ cmp.ne p7, p14 = r0, r0 // Set p14 if log10l
+}
+{ .mfb
+ addl GR_ad_z_1 = @ltoff(Constants_Z_1#),gp
+ fnorm.s1 FR_X_Prime = FR_Input_X // Normalize x
+ nop.b 999
+}
+;;
+
+
+// Common code for logl and log10
+LOGL_BEGIN:
+{ .mfi
+ ld8 GR_ad_z_1 = [GR_ad_z_1] // Get pointer to Constants_Z_1
+ fclass.m p10, p0 = FR_Input_X, 0x0b // Test for denormal
+ mov GR_exp_2tom7 = 0x0fff8 // Exponent of 2^-7
+}
+;;
+
+{ .mfb
+ getf.sig GR_signif = FR_Input_X // Get significand of x
+ fcmp.eq.s1 p9, p0 = FR_Input_X, f1 // Test for x=1.0
+(p6) br.cond.spnt LOGL_64_special // Branch for nan, inf, natval
+}
+;;
+
+{ .mfi
+ add GR_ad_tbl_1 = 0x040, GR_ad_z_1 // Point to Constants_G_H_h1
+ fcmp.lt.s1 p13, p0 = FR_Input_X, f0 // Test for x<0
+ add GR_ad_p = -0x100, GR_ad_z_1 // Point to Constants_P
+}
+{ .mib
+ add GR_ad_z_2 = 0x140, GR_ad_z_1 // Point to Constants_Z_2
+ add GR_ad_tbl_2 = 0x180, GR_ad_z_1 // Point to Constants_G_H_h2
+(p10) br.cond.spnt LOGL_64_denormal // Branch for denormal
+}
+;;
+
+LOGL_64_COMMON:
+{ .mfi
+ add GR_ad_q = 0x080, GR_ad_p // Point to Constants_Q
+ fcmp.eq.s1 p8, p0 = FR_Input_X, f0 // Test for x=0
+ extr.u GR_Index1 = GR_signif, 59, 4 // Get high 4 bits of signif
+}
+{ .mfb
+ add GR_ad_tbl_3 = 0x280, GR_ad_z_1 // Point to Constants_G_H_h3
+(p9) fma.s0 f8 = FR_Input_X, f0, f0 // If x=1, return +0.0
+(p9) br.ret.spnt b0 // Exit if x=1
+}
+;;
+
+{ .mfi
+ shladd GR_ad_z_1 = GR_Index1, 2, GR_ad_z_1 // Point to Z_1
+ fclass.nm p10, p0 = FR_Input_X, 0x1FF // Test for unsupported
+ extr.u GR_X_0 = GR_signif, 49, 15 // Get high 15 bits of significand
+}
+{ .mfi
+ ldfe FR_P8 = [GR_ad_p],16 // Load P_8 for near1 path
+ fsub.s1 FR_W = FR_X_Prime, f1 // W = x - 1
+ add GR_ad_ln10 = 0x060, GR_ad_q // Point to Constants_1_by_LN10
+}
+;;
+
+{ .mfi
+ ld4 GR_Z_1 = [GR_ad_z_1] // Load Z_1
+ nop.f 999
+ mov GR_exp_mask = 0x1FFFF // Create exponent mask
+}
+{ .mib
+ shladd GR_ad_tbl_1 = GR_Index1, 4, GR_ad_tbl_1 // Point to G_1
+ mov GR_Bias = 0x0FFFF // Create exponent bias
+(p13) br.cond.spnt LOGL_64_negative // Branch if x<0
+}
+;;
+
+{ .mfb
+ ldfps FR_G, FR_H = [GR_ad_tbl_1],8 // Load G_1, H_1
+ fmerge.se FR_S_hi = f1,FR_X_Prime // Form |x|
+(p8) br.cond.spnt LOGL_64_zero // Branch if x=0
+}
+;;
+
+{ .mmb
+ getf.exp GR_N = FR_X_Prime // Get N = exponent of x
+ ldfd FR_h = [GR_ad_tbl_1] // Load h_1
+(p10) br.cond.spnt LOGL_64_unsupported // Branch for unsupported type
+}
+;;
+
+{ .mfi
+ ldfe FR_log2_hi = [GR_ad_q],16 // Load log2_hi
+ fcmp.eq.s0 p8, p0 = FR_Input_X, f0 // Dummy op to flag denormals
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // Get bits 30-15 of X_0 * Z_1
+}
+;;
+
+//
+// For performance, don't use result of pmpyshr2.u for 4 cycles.
+//
+{ .mmi
+ ldfe FR_log2_lo = [GR_ad_q],16 // Load log2_lo
+(p14) ldfe FR_1LN10_hi = [GR_ad_ln10],16 // If log10l, load 1/ln10_hi
+ sub GR_N = GR_N, GR_Bias
+}
+;;
+
+{ .mmi
+ ldfe FR_Q4 = [GR_ad_q],16 // Load Q4
+(p14) ldfe FR_1LN10_lo = [GR_ad_ln10] // If log10l, load 1/ln10_lo
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ldfe FR_Q3 = [GR_ad_q],16 // Load Q3
+ setf.sig FR_float_N = GR_N // Put integer N into rightmost significand
+ nop.i 999
+}
+;;
+
+{ .mmi
+ getf.exp GR_M = FR_W // Get signexp of w = x - 1
+ ldfe FR_Q2 = [GR_ad_q],16 // Load Q2
+ extr.u GR_Index2 = GR_X_1, 6, 4 // Extract bits 6-9 of X_1
+}
+;;
+
+{ .mmi
+ ldfe FR_Q1 = [GR_ad_q] // Load Q1
+ shladd GR_ad_z_2 = GR_Index2, 2, GR_ad_z_2 // Point to Z_2
+ add GR_ad_p2 = 0x30,GR_ad_p // Point to P_4
+}
+;;
+
+{ .mmi
+ ld4 GR_Z_2 = [GR_ad_z_2] // Load Z_2
+ shladd GR_ad_tbl_2 = GR_Index2, 4, GR_ad_tbl_2 // Point to G_2
+ and GR_M = GR_exp_mask, GR_M // Get exponent of w = x - 1
+}
+;;
+
+{ .mmi
+ ldfps FR_G2, FR_H2 = [GR_ad_tbl_2],8 // Load G_2, H_2
+ cmp.lt p8, p9 = GR_M, GR_exp_2tom7 // Test |x-1| < 2^-7
+ nop.i 999
+}
+;;
+
+// Paths are merged.
+// p8 is for the near1 path: |x-1| < 2^-7
+// p9 is for regular path: |x-1| >= 2^-7
+
+{ .mmi
+ ldfd FR_h2 = [GR_ad_tbl_2] // Load h_2
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p8) ldfe FR_P7 = [GR_ad_p],16 // Load P_7 for near1 path
+(p8) ldfe FR_P4 = [GR_ad_p2],16 // Load P_4 for near1 path
+(p9) pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // Get bits 30-15 of X_1 * Z_2
+}
+;;
+
+//
+// For performance, don't use result of pmpyshr2.u for 4 cycles.
+//
+{ .mmi
+(p8) ldfe FR_P6 = [GR_ad_p],16 // Load P_6 for near1 path
+(p8) ldfe FR_P3 = [GR_ad_p2],16 // Load P_3 for near1 path
+ nop.i 999
+}
+;;
+
+{ .mmf
+(p8) ldfe FR_P5 = [GR_ad_p],16 // Load P_5 for near1 path
+(p8) ldfe FR_P2 = [GR_ad_p2],16 // Load P_2 for near1 path
+(p8) fmpy.s1 FR_wsq = FR_W, FR_W // wsq = w * w for near1 path
+}
+;;
+
+{ .mmi
+(p8) ldfe FR_P1 = [GR_ad_p2],16 ;; // Load P_1 for near1 path
+ nop.m 999
+(p9) extr.u GR_Index3 = GR_X_2, 1, 5 // Extract bits 1-5 of X_2
+}
+;;
+
+{ .mfi
+(p9) shladd GR_ad_tbl_3 = GR_Index3, 4, GR_ad_tbl_3 // Point to G_3
+(p9) fcvt.xf FR_float_N = FR_float_N
+ nop.i 999
+}
+;;
+
+{ .mfi
+(p9) ldfps FR_G3, FR_H3 = [GR_ad_tbl_3],8 // Load G_3, H_3
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+(p9) ldfd FR_h3 = [GR_ad_tbl_3] // Load h_3
+(p9) fmpy.s1 FR_G = FR_G, FR_G2 // G = G_1 * G_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FR_H = FR_H, FR_H2 // H = H_1 + H_2
+ nop.i 999
+}
+;;
+
+{ .mmf
+ nop.m 999
+ nop.m 999
+(p9) fadd.s1 FR_h = FR_h, FR_h2 // h = h_1 + h_2
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fmpy.s1 FR_w4 = FR_wsq, FR_wsq // w4 = w^4 for near1 path
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_p87 = FR_W, FR_P8, FR_P7 // p87 = w * P8 + P7
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_p43 = FR_W, FR_P4, FR_P3 // p43 = w * P4 + P3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fmpy.s1 FR_G = FR_G, FR_G3 // G = (G_1 * G_2) * G_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FR_H = FR_H, FR_H3 // H = (H_1 + H_2) + H_3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FR_h = FR_h, FR_h3 // h = (h_1 + h_2) + h_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fmpy.s1 FR_w6 = FR_w4, FR_wsq // w6 = w^6 for near1 path
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_p432 = FR_W, FR_p43, FR_P2 // p432 = w * p43 + P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_p876 = FR_W, FR_p87, FR_P6 // p876 = w * p87 + P6
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fms.s1 FR_r = FR_G, FR_S_hi, f1 // r = G * S_hi - 1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_Y_hi = FR_float_N, FR_log2_hi, FR_H // Y_hi = N * log2_hi + H
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_h = FR_float_N, FR_log2_lo, FR_h // h = N * log2_lo + h
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_p4321 = FR_W, FR_p432, FR_P1 // p4321 = w * p432 + P1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_p8765 = FR_W, FR_p876, FR_P5 // p8765 = w * p876 + P5
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_r, FR_Q4, FR_Q3 // poly_lo = r * Q4 + Q3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fmpy.s1 FR_rsq = FR_r, FR_r // rsq = r * r
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_Y_lo = FR_wsq, FR_p4321, f0 // Y_lo = wsq * p4321
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_Y_hi = FR_W, f1, f0 // Y_hi = w for near1 path
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_poly_lo, FR_r, FR_Q2 // poly_lo = poly_lo * r + Q2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_rcub = FR_rsq, FR_r, f0 // rcub = r^3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fma.s1 FR_Y_lo = FR_w6, FR_p8765,FR_Y_lo // Y_lo = w6 * p8765 + w2 * p4321
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_hi = FR_Q1, FR_rsq, FR_r // poly_hi = Q1 * rsq + r
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fma.s1 FR_poly_lo = FR_poly_lo, FR_rcub, FR_h // poly_lo = poly_lo*r^3 + h
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fadd.s1 FR_Y_lo = FR_poly_hi, FR_poly_lo // Y_lo = poly_hi + poly_lo
+ nop.i 999
+}
+;;
+
+// Remainder of code is common for near1 and regular paths
+{ .mfi
+ nop.m 999
+(p7) fadd.s0 f8 = FR_Y_lo,FR_Y_hi // If logl, result=Y_lo+Y_hi
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p14) fmpy.s1 FR_Output_X_tmp = FR_Y_lo,FR_1LN10_hi
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fma.s1 FR_Output_X_tmp = FR_Y_hi,FR_1LN10_lo,FR_Output_X_tmp
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p14) fma.s0 f8 = FR_Y_hi,FR_1LN10_hi,FR_Output_X_tmp
+ br.ret.sptk b0 // Common exit for 0 < x < inf
+}
+;;
+
+
+// Here if x=+-0
+LOGL_64_zero:
+//
+// If x=+-0 raise divide by zero and return -inf
+//
+{ .mfi
+(p7) mov GR_Parameter_TAG = 0
+ fsub.s1 FR_Output_X_tmp = f0, f1
+ nop.i 999
+}
+;;
+
+{ .mfb
+(p14) mov GR_Parameter_TAG = 6
+ frcpa.s0 FR_Output_X_tmp, p8 = FR_Output_X_tmp, f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+LOGL_64_special:
+{ .mfi
+ nop.m 999
+ fclass.m.unc p8, p0 = FR_Input_X, 0x1E1 // Test for natval, nan, +inf
+ nop.i 999
+}
+;;
+
+//
+// For SNaN raise invalid and return QNaN.
+// For QNaN raise invalid and return QNaN.
+// For +Inf return +Inf.
+//
+{ .mfb
+ nop.m 999
+(p8) fmpy.s0 f8 = FR_Input_X, f1
+(p8) br.ret.sptk b0 // Return for natval, nan, +inf
+}
+;;
+
+//
+// For -Inf raise invalid and return QNaN.
+//
+{ .mmi
+(p7) mov GR_Parameter_TAG = 1
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mfb
+(p14) mov GR_Parameter_TAG = 7
+ fmpy.s0 FR_Output_X_tmp = FR_Input_X, f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+// Here if x denormal or unnormal
+LOGL_64_denormal:
+{ .mmi
+ getf.sig GR_signif = FR_X_Prime // Get significand of normalized input
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mmb
+ getf.exp GR_N = FR_X_Prime // Get exponent of normalized input
+ nop.m 999
+ br.cond.sptk LOGL_64_COMMON // Branch back to common code
+}
+;;
+
+LOGL_64_unsupported:
+//
+// Return generated NaN or other value.
+//
+{ .mfb
+ nop.m 999
+ fmpy.s0 f8 = FR_Input_X, f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if -inf < x < 0
+LOGL_64_negative:
+//
+// Deal with x < 0 in a special way - raise
+// invalid and produce QNaN indefinite.
+//
+{ .mfi
+(p7) mov GR_Parameter_TAG = 1
+ frcpa.s0 FR_Output_X_tmp, p8 = f0, f0
+ nop.i 999
+}
+;;
+
+{ .mib
+(p14) mov GR_Parameter_TAG = 7
+ nop.i 999
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+GLOBAL_IEEE754_END(log10l)
+libm_alias_ldouble_other (__log10, log10)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 999
+ nop.m 999
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region#)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_pow.S b/sysdeps/ia64/fpu/e_pow.S
new file mode 100644
index 0000000000..ba6a7996d9
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_pow.S
@@ -0,0 +1,2302 @@
+.file "pow.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 02/03/00 Added p12 to definite over/under path. With odd power we did not
+// maintain the sign of x in this path.
+// 04/04/00 Unwind support added
+// 04/19/00 pow(+-1,inf) now returns NaN
+// pow(+-val, +-inf) returns 0 or inf, but now does not call error
+// support
+// Added s1 to fcvt.fx because invalid flag was incorrectly set.
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 09/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// and tweaking the critical path
+// 09/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 09/28/00 Updated NaN**0 path
+// 01/20/01 Fixed denormal flag settings.
+// 02/13/01 Improved speed.
+// 03/19/01 Reordered exp polynomial to improve speed and eliminate monotonicity
+// problem in round up, down, and to zero modes. Also corrected
+// overflow result when x negative, y odd in round up, down, zero.
+// 06/14/01 Added brace missing from bundle
+// 12/10/01 Corrected case where x negative, 2^52 <= |y| < 2^53, y odd integer.
+// 12/20/01 Fixed monotonity problem in round to nearest.
+// 02/08/02 Fixed overflow/underflow cases that were not calling error support.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/29/02 Improved Itanium 2 performance
+// 09/21/02 Added branch for |y*log(x)|<2^-11 to fix monotonicity problems.
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//==============================================================
+// double pow(double x, double y)
+//
+// Overview of operation
+//==============================================================
+//
+// Three steps...
+// 1. Log(x)
+// 2. y Log(x)
+// 3. exp(y log(x))
+//
+// This means we work with the absolute value of x and merge in the sign later.
+// Log(x) = G + delta + r -rsq/2 + p
+// G,delta depend on the exponent of x and table entries. The table entries are
+// indexed by the exponent of x, called K.
+//
+// The G and delta come out of the reduction; r is the reduced x.
+//
+// B = frcpa(x)
+// xB-1 is small means that B is the approximate inverse of x.
+//
+// Log(x) = Log( (1/B)(Bx) )
+// = Log(1/B) + Log(Bx)
+// = Log(1/B) + Log( 1 + (Bx-1))
+//
+// x = 2^K 1.x_1x_2.....x_52
+// B= frcpa(x) = 2^-k Cm
+// Log(1/B) = Log(1/(2^-K Cm))
+// Log(1/B) = Log((2^K/ Cm))
+// Log(1/B) = K Log(2) + Log(1/Cm)
+//
+// Log(x) = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
+//
+// If you take the significand of x, set the exponent to true 0, then Cm is
+// the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
+// The frcpa table is indexed by 8 bits, the x_1 thru x_8.
+// m = x_1x_2...x_8 is an 8-bit index.
+//
+// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
+//
+// We tabulate as two doubles, T and t, where T +t is the value itself.
+//
+// Log(x) = (K Log(2)_hi + T) + (Log(2)_hi + t) + Log( 1 + (Bx-1))
+// Log(x) = G + delta + Log( 1 + (Bx-1))
+//
+// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
+//
+// Log( 1 + (Bx-1)) = r - rsq/2 + p
+//
+// Then,
+//
+// yLog(x) = yG + y delta + y(r-rsq/2) + yp
+// yLog(x) = Z1 + e3 + Z2 + Z3 + (e2 + e3)
+//
+//
+// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+//
+//
+// exp(Z3) is another series.
+// exp(e1 + e2 + e3) is approximated as f3 = 1 + (e1 + e2 + e3)
+//
+// Z1 (128/log2) = number of log2/128 in Z1 is N1
+// Z2 (128/log2) = number of log2/128 in Z2 is N2
+//
+// s1 = Z1 - N1 log2/128
+// s2 = Z2 - N2 log2/128
+//
+// s = s1 + s2
+// N = N1 + N2
+//
+// exp(Z1 + Z2) = exp(Z)
+// exp(Z) = exp(s) exp(N log2/128)
+//
+// exp(r) = exp(Z - N log2/128)
+//
+// r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
+// = Z - N (log2/128)
+//
+// Z = s+d +N (log2/128)
+//
+// exp(Z) = exp(s) (1+d) exp(N log2/128)
+//
+// N = M 128 + n
+//
+// N log2/128 = M log2 + n log2/128
+//
+// n is 8 binary digits = n_7n_6...n_1
+//
+// n log2/128 = n_7n_6n_5 16 log2/128 + n_4n_3n_2n_1 log2/128
+// n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
+// n log2/128 = I2 log2/8 + I1 log2/128
+//
+// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+//
+// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
+// exp(Z) = exp(s) (1+d1) (1+d2)(2^M) 2^I2/8 2^I1/128
+// exp(Z) = exp(s) f1 f2 (2^M) 2^I2/8 2^I1/128
+//
+// I1, I2 are table indices. Use a series for exp(s).
+// Then get exp(Z)
+//
+// exp(yLog(x)) = exp(Z1 + Z2 + Z3) exp(e1 + e2 + e3)
+// exp(yLog(x)) = exp(Z) exp(Z3) f3
+// exp(yLog(x)) = exp(Z)f3 exp(Z3)
+// exp(yLog(x)) = A exp(Z3)
+//
+// We actually calculate exp(Z3) -1.
+// Then,
+// exp(yLog(x)) = A + A( exp(Z3) -1)
+//
+
+// Table Generation
+//==============================================================
+
+// The log values
+// ==============
+// The operation (K*log2_hi) must be exact. K is the true exponent of x.
+// If we allow gradual underflow (denormals), K can be represented in 12 bits
+// (as a two's complement number). We assume 13 bits as an engineering
+// precaution.
+//
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+// 0 1 66
+// 2 34
+//
+// So we want the lsb(log2_hi) to be 2^-50
+// We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
+//
+// 0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
+//
+// Consider numbering the bits left to right, starting at 0 thru 127.
+// Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
+//
+// ...79ab
+// 0111 1001 1010 1011
+// 44
+// 89
+//
+// So if we shift off the rightmost 14 bits, then (shift back only
+// the top half) we get
+//
+// 0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
+//
+// Put the right 64-bit signficand in an FR register, convert to double;
+// it is exact. Put the next 128 bits into a quad register and round to double.
+// The true exponent of the low part is -51.
+//
+// hi is 0 fffe b17217f7d1cf4000
+// lo is 0 ffcc e6af278ece601000
+//
+// Convert to double memory format and get
+//
+// hi is 0x3fe62e42fefa39e8
+// lo is 0x3cccd5e4f1d9cc02
+//
+// log2_hi + log2_lo is an accurate value for log2.
+//
+//
+// The T and t values
+// ==================
+// A similar method is used to generate the T and t values.
+//
+// K * log2_hi + T must be exact.
+//
+// Smallest T,t
+// ----------
+// The smallest T,t is
+// T t
+// 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
+//
+// The exponent is 0x3f6 (biased) or -9 (true).
+// For the smallest T value, what we want is to clip the significand such that
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the
+// specific for the first entry. In general, it is 0xffff - (biased 15-bit
+// exponent).
+
+// Independently, what we have calculated is the table value as a quad
+// precision number.
+// Table entry 1 is
+// 0 fff6 80200aaeac44ef38 338f77605fdf8000
+//
+// We store this quad precision number in a data structure that is
+// sign: 1
+// exponent: 15
+// signficand_hi: 64 (includes explicit bit)
+// signficand_lo: 49
+// Because the explicit bit is included, the significand is 113 bits.
+//
+// Consider significand_hi for table entry 1.
+//
+//
+// +-+--- ... -------+--------------------+
+// | |
+// +-+--- ... -------+--------------------+
+// 0 1 4444444455555555556666
+// 2345678901234567890123
+//
+// Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
+// Bit 42 is 2^-42. If we shift to the right by 9, the bit in
+// bit 42 goes in 51.
+//
+// So what we want to do is shift bits 43 thru 63 into significand_lo.
+// This is shifting bit 42 into bit 63, taking care to retain shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42.
+//
+// The shift_value is 63-42 = 21. In general, this is
+// 63 - (51 -(0xffff - 0xfff6))
+// For this example, it is
+// 63 - (51 - 9) = 63 - 42 = 21
+//
+// This means we are shifting 21 bits into significand_lo. We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the
+// 128-bit significand into a 256-bit signficand and then shift.
+// The 256-bit significand has four parts: hh, hl, lh, and ll.
+//
+// Start off with
+// hh hl lh ll
+// <64> <49><15_0> <64_0> <64_0>
+//
+// After shift by 21 (then return for significand_hi),
+// <43><21_0> <21><43> <6><58_0> <64_0>
+//
+// Take the hh part and convert to a double. There is no rounding here.
+// The conversion is exact. The true exponent of the high part is the same as
+// the true exponent of the input quad.
+//
+// We have some 64 plus significand bits for the low part. In this example, we
+// have 70 bits. We want to round this to a double. Put them in a quad and then
+// do a quad fnorm.
+// For this example the true exponent of the low part is
+// true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
+// In general, this is
+// true_exponent_of_high - (64 - shift_value)
+//
+//
+// Largest T,t
+// ----------
+// The largest T,t is
+// 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))=+6.92171e-001
+//
+// Table entry 256 is
+// 0 fffe b1321ff67cba178c 51da12f4df5a0000
+//
+// The shift value is
+// 63 - (51 -(0xffff - 0xfffe)) = 13
+//
+// The true exponent of the low part is
+// true_exponent_of_high - (64 - shift_value)
+// -1 - (64-13) = -52
+// Biased as a double, this is 0x3cb
+//
+//
+//
+// So then lsb(T) must be >= 2^-51
+// msb(Klog2_hi) <= 2^12
+//
+// +--------+---------+
+// | 51 bits | <== largest T
+// +--------+---------+
+// | 9 bits | 42 bits | <== smallest T
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+
+
+// Special Cases
+//==============================================================
+
+// double float
+// overflow error 24 30
+
+// underflow error 25 31
+
+// X zero Y zero
+// +0 +0 +1 error 26 32
+// -0 +0 +1 error 26 32
+// +0 -0 +1 error 26 32
+// -0 -0 +1 error 26 32
+
+// X zero Y negative
+// +0 -odd integer +inf error 27 33 divide-by-zero
+// -0 -odd integer -inf error 27 33 divide-by-zero
+// +0 !-odd integer +inf error 27 33 divide-by-zero
+// -0 !-odd integer +inf error 27 33 divide-by-zero
+// +0 -inf +inf error 27 33 divide-by-zero
+// -0 -inf +inf error 27 33 divide-by-zero
+
+// X zero Y positive
+// +0 +odd integer +0
+// -0 +odd integer -0
+// +0 !+odd integer +0
+// -0 !+odd integer +0
+// +0 +inf +0
+// -0 +inf +0
+// +0 Y NaN quiet Y invalid if Y SNaN
+// -0 Y NaN quiet Y invalid if Y SNaN
+
+// X one
+// -1 Y inf +1
+// -1 Y NaN quiet Y invalid if Y SNaN
+// +1 Y NaN +1 invalid if Y SNaN
+// +1 Y any else +1
+
+// X - Y not integer QNAN error 28 34 invalid
+
+// X NaN Y 0 +1 error 29 35
+// X NaN Y NaN quiet X invalid if X or Y SNaN
+// X NaN Y any else quiet X invalid if X SNaN
+// X !+1 Y NaN quiet Y invalid if Y SNaN
+
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf
+// X -inf Y >0, odd integer -inf
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X +inf Y =0 +1
+// X -inf Y =0 +1
+
+// |X|<1 Y +inf +0
+// |X|<1 Y -inf +inf
+// |X|>1 Y +inf +inf
+// |X|>1 Y -inf +0
+
+// X any Y =0 +1
+
+// Assembly macros
+//==============================================================
+
+// integer registers used
+
+pow_GR_signexp_X = r14
+pow_GR_17ones = r15
+pow_AD_P = r16
+pow_GR_exp_2tom8 = r17
+pow_GR_sig_X = r18
+pow_GR_10033 = r19
+pow_GR_16ones = r20
+
+pow_AD_Tt = r21
+pow_GR_exp_X = r22
+pow_AD_Q = r23
+pow_GR_true_exp_X = r24
+pow_GR_y_zero = r25
+
+pow_GR_exp_Y = r26
+pow_AD_tbl1 = r27
+pow_AD_tbl2 = r28
+pow_GR_offset = r29
+pow_GR_exp_Xm1 = r30
+pow_GR_xneg_yodd = r31
+
+pow_GR_signexp_Xm1 = r35
+pow_GR_int_W1 = r36
+pow_GR_int_W2 = r37
+pow_GR_int_N = r38
+pow_GR_index1 = r39
+pow_GR_index2 = r40
+
+pow_AD_T1 = r41
+pow_AD_T2 = r42
+pow_int_GR_M = r43
+pow_GR_sig_int_Y = r44
+pow_GR_sign_Y_Gpr = r45
+
+pow_GR_17ones_m1 = r46
+pow_GR_one = r47
+pow_GR_sign_Y = r48
+pow_GR_signexp_Y_Gpr = r49
+pow_GR_exp_Y_Gpr = r50
+
+pow_GR_true_exp_Y_Gpr = r51
+pow_GR_signexp_Y = r52
+pow_GR_x_one = r53
+pow_GR_exp_2toM63 = r54
+pow_GR_big_pos = r55
+
+pow_GR_big_neg = r56
+
+GR_SAVE_B0 = r50
+GR_SAVE_GP = r51
+GR_SAVE_PFS = r52
+
+GR_Parameter_X = r53
+GR_Parameter_Y = r54
+GR_Parameter_RESULT = r55
+pow_GR_tag = r56
+
+
+// floating point registers used
+
+POW_B = f32
+POW_NORM_X = f33
+POW_Xm1 = f34
+POW_r1 = f34
+POW_P4 = f35
+
+POW_P5 = f36
+POW_NORM_Y = f37
+POW_Q2 = f38
+POW_Q3 = f39
+POW_P2 = f40
+
+POW_P3 = f41
+POW_P0 = f42
+POW_log2_lo = f43
+POW_r = f44
+POW_Q0_half = f45
+
+POW_Q1 = f46
+POW_tmp = f47
+POW_log2_hi = f48
+POW_Q4 = f49
+POW_P1 = f50
+
+POW_log2_by_128_hi = f51
+POW_inv_log2_by_128 = f52
+POW_rsq = f53
+POW_Yrcub = f54
+POW_log2_by_128_lo = f55
+
+POW_v6 = f56
+POW_xsq = f57
+POW_v4 = f58
+POW_v2 = f59
+POW_T = f60
+
+POW_Tt = f61
+POW_RSHF = f62
+POW_v21ps = f63
+POW_s4 = f64
+POW_twoV = f65
+
+POW_U = f66
+POW_G = f67
+POW_delta = f68
+POW_v3 = f69
+POW_V = f70
+
+POW_p = f71
+POW_Z1 = f72
+POW_e3 = f73
+POW_e2 = f74
+POW_Z2 = f75
+
+POW_e1 = f76
+POW_W1 = f77
+POW_UmZ2 = f78
+POW_W2 = f79
+POW_Z3 = f80
+
+POW_int_W1 = f81
+POW_e12 = f82
+POW_int_W2 = f83
+POW_UmZ2pV = f84
+POW_Z3sq = f85
+
+POW_e123 = f86
+POW_N1float = f87
+POW_N2float = f88
+POW_f3 = f89
+POW_q = f90
+
+POW_s1 = f91
+POW_Nfloat = f92
+POW_s2 = f93
+POW_f2 = f94
+POW_f1 = f95
+
+POW_T1 = f96
+POW_T2 = f97
+POW_2M = f98
+POW_s = f99
+POW_f12 = f100
+
+POW_ssq = f101
+POW_T1T2 = f102
+POW_1ps = f103
+POW_A = f104
+POW_es = f105
+
+POW_Xp1 = f106
+POW_int_K = f107
+POW_K = f108
+POW_f123 = f109
+POW_Gpr = f110
+
+POW_Y_Gpr = f111
+POW_int_Y = f112
+POW_abs_q = f114
+POW_2toM63 = f115
+
+POW_float_int_Y = f116
+POW_ftz_urm_f8 = f117
+POW_wre_urm_f8 = f118
+POW_big_neg = f119
+POW_big_pos = f120
+
+POW_GY_Z2 = f121
+POW_pYrcub_e3 = f122
+POW_d = f123
+POW_d2 = f124
+POW_poly_d_hi = f121
+POW_poly_d_lo = f122
+POW_poly_d = f121
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(pow_table_P)
+data8 0x8000F7B249FF332D, 0x0000BFFC // P_5
+data8 0xAAAAAAA9E7902C7F, 0x0000BFFC // P_3
+data8 0x80000000000018E5, 0x0000BFFD // P_1
+data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
+//
+//
+data8 0x3FA5555555554A9E // Q_2
+data8 0x3F8111124F4DD9F9 // Q_3
+data8 0x3FE0000000000000 // Q_0
+data8 0x3FC5555555554733 // Q_1
+data8 0x3F56C16D9360FFA0 // Q_4
+data8 0x43e8000000000000 // Right shift constant for exp
+data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
+data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
+data8 0x0000000000000000 // pad to eliminate bank conflicts with pow_table_Q
+LOCAL_OBJECT_END(pow_table_P)
+
+LOCAL_OBJECT_START(pow_table_Q)
+data8 0x9249FE7F0DC423CF, 0x00003FFC // P_4
+data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
+data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
+data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
+data8 0xb17217f7d1cf79ab, 0x00003ff7 // ln2_by_128_hi
+LOCAL_OBJECT_END(pow_table_Q)
+
+
+LOCAL_OBJECT_START(pow_Tt)
+data8 0x3f60040155d58800, 0x3c93bce0ce3ddd81 // log(1/frcpa(1+0/256))= +1.95503e-003
+data8 0x3f78121214586a00, 0x3cb540e0a5cfc9bc // log(1/frcpa(1+1/256))= +5.87661e-003
+data8 0x3f841929f9683200, 0x3cbdf1d57404da1f // log(1/frcpa(1+2/256))= +9.81362e-003
+data8 0x3f8c317384c75f00, 0x3c69806208c04c22 // log(1/frcpa(1+3/256))= +1.37662e-002
+data8 0x3f91a6b91ac73380, 0x3c7874daa716eb32 // log(1/frcpa(1+4/256))= +1.72376e-002
+data8 0x3f95ba9a5d9ac000, 0x3cacbb84e08d78ac // log(1/frcpa(1+5/256))= +2.12196e-002
+data8 0x3f99d2a807432580, 0x3cbcf80538b441e1 // log(1/frcpa(1+6/256))= +2.52177e-002
+data8 0x3f9d6b2725979800, 0x3c6095e5c8f8f359 // log(1/frcpa(1+7/256))= +2.87291e-002
+data8 0x3fa0c58fa19dfa80, 0x3cb4c5d4e9d0dda2 // log(1/frcpa(1+8/256))= +3.27573e-002
+data8 0x3fa2954c78cbce00, 0x3caa932b860ab8d6 // log(1/frcpa(1+9/256))= +3.62953e-002
+data8 0x3fa4a94d2da96c40, 0x3ca670452b76bbd5 // log(1/frcpa(1+10/256))= +4.03542e-002
+data8 0x3fa67c94f2d4bb40, 0x3ca84104f9941798 // log(1/frcpa(1+11/256))= +4.39192e-002
+data8 0x3fa85188b630f040, 0x3cb40a882cbf0153 // log(1/frcpa(1+12/256))= +4.74971e-002
+data8 0x3faa6b8abe73af40, 0x3c988d46e25c9059 // log(1/frcpa(1+13/256))= +5.16017e-002
+data8 0x3fac441e06f72a80, 0x3cae3e930a1a2a96 // log(1/frcpa(1+14/256))= +5.52072e-002
+data8 0x3fae1e6713606d00, 0x3c8a796f6283b580 // log(1/frcpa(1+15/256))= +5.88257e-002
+data8 0x3faffa6911ab9300, 0x3c5193070351e88a // log(1/frcpa(1+16/256))= +6.24574e-002
+data8 0x3fb0ec139c5da600, 0x3c623f2a75eb992d // log(1/frcpa(1+17/256))= +6.61022e-002
+data8 0x3fb1dbd2643d1900, 0x3ca649b2ef8927f0 // log(1/frcpa(1+18/256))= +6.97605e-002
+data8 0x3fb2cc7284fe5f00, 0x3cbc5e86599513e2 // log(1/frcpa(1+19/256))= +7.34321e-002
+data8 0x3fb3bdf5a7d1ee60, 0x3c90bd4bb69dada3 // log(1/frcpa(1+20/256))= +7.71173e-002
+data8 0x3fb4b05d7aa012e0, 0x3c54e377c9b8a54f // log(1/frcpa(1+21/256))= +8.08161e-002
+data8 0x3fb580db7ceb5700, 0x3c7fdb2f98354cde // log(1/frcpa(1+22/256))= +8.39975e-002
+data8 0x3fb674f089365a60, 0x3cb9994c9d3301c1 // log(1/frcpa(1+23/256))= +8.77219e-002
+data8 0x3fb769ef2c6b5680, 0x3caaec639db52a79 // log(1/frcpa(1+24/256))= +9.14602e-002
+data8 0x3fb85fd927506a40, 0x3c9f9f99a3cf8e25 // log(1/frcpa(1+25/256))= +9.52125e-002
+data8 0x3fb9335e5d594980, 0x3ca15c3abd47d99a // log(1/frcpa(1+26/256))= +9.84401e-002
+data8 0x3fba2b0220c8e5e0, 0x3cb4ca639adf6fc3 // log(1/frcpa(1+27/256))= +1.02219e-001
+data8 0x3fbb0004ac1a86a0, 0x3ca7cb81bf959a59 // log(1/frcpa(1+28/256))= +1.05469e-001
+data8 0x3fbbf968769fca00, 0x3cb0c646c121418e // log(1/frcpa(1+29/256))= +1.09274e-001
+data8 0x3fbccfedbfee13a0, 0x3ca0465fce24ab4b // log(1/frcpa(1+30/256))= +1.12548e-001
+data8 0x3fbda727638446a0, 0x3c82803f4e2e6603 // log(1/frcpa(1+31/256))= +1.15832e-001
+data8 0x3fbea3257fe10f60, 0x3cb986a3f2313d1a // log(1/frcpa(1+32/256))= +1.19677e-001
+data8 0x3fbf7be9fedbfde0, 0x3c97d16a6a621cf4 // log(1/frcpa(1+33/256))= +1.22985e-001
+data8 0x3fc02ab352ff25f0, 0x3c9cc6baad365600 // log(1/frcpa(1+34/256))= +1.26303e-001
+data8 0x3fc097ce579d2040, 0x3cb9ba16d329440b // log(1/frcpa(1+35/256))= +1.29633e-001
+data8 0x3fc1178e8227e470, 0x3cb7bc671683f8e6 // log(1/frcpa(1+36/256))= +1.33531e-001
+data8 0x3fc185747dbecf30, 0x3c9d1116f66d2345 // log(1/frcpa(1+37/256))= +1.36885e-001
+data8 0x3fc1f3b925f25d40, 0x3c8162c9ef939ac6 // log(1/frcpa(1+38/256))= +1.40250e-001
+data8 0x3fc2625d1e6ddf50, 0x3caad3a1ec384fc3 // log(1/frcpa(1+39/256))= +1.43627e-001
+data8 0x3fc2d1610c868130, 0x3cb3ad997036941b // log(1/frcpa(1+40/256))= +1.47015e-001
+data8 0x3fc340c597411420, 0x3cbc2308262c7998 // log(1/frcpa(1+41/256))= +1.50414e-001
+data8 0x3fc3b08b6757f2a0, 0x3cb2170d6cdf0526 // log(1/frcpa(1+42/256))= +1.53825e-001
+data8 0x3fc40dfb08378000, 0x3c9bb453c4f7b685 // log(1/frcpa(1+43/256))= +1.56677e-001
+data8 0x3fc47e74e8ca5f70, 0x3cb836a48fdfce9d // log(1/frcpa(1+44/256))= +1.60109e-001
+data8 0x3fc4ef51f6466de0, 0x3ca07a43919aa64b // log(1/frcpa(1+45/256))= +1.63553e-001
+data8 0x3fc56092e02ba510, 0x3ca85006899d97b0 // log(1/frcpa(1+46/256))= +1.67010e-001
+data8 0x3fc5d23857cd74d0, 0x3ca30a5ba6e7abbe // log(1/frcpa(1+47/256))= +1.70478e-001
+data8 0x3fc6313a37335d70, 0x3ca905586f0ac97e // log(1/frcpa(1+48/256))= +1.73377e-001
+data8 0x3fc6a399dabbd380, 0x3c9b2c6657a96684 // log(1/frcpa(1+49/256))= +1.76868e-001
+data8 0x3fc70337dd3ce410, 0x3cb50bc52f55cdd8 // log(1/frcpa(1+50/256))= +1.79786e-001
+data8 0x3fc77654128f6120, 0x3cad2eb7c9a39efe // log(1/frcpa(1+51/256))= +1.83299e-001
+data8 0x3fc7e9d82a0b0220, 0x3cba127e90393c01 // log(1/frcpa(1+52/256))= +1.86824e-001
+data8 0x3fc84a6b759f5120, 0x3cbd7fd52079f706 // log(1/frcpa(1+53/256))= +1.89771e-001
+data8 0x3fc8ab47d5f5a300, 0x3cbfae141751a3de // log(1/frcpa(1+54/256))= +1.92727e-001
+data8 0x3fc91fe490965810, 0x3cb69cf30a1c319e // log(1/frcpa(1+55/256))= +1.96286e-001
+data8 0x3fc981634011aa70, 0x3ca5bb3d208bc42a // log(1/frcpa(1+56/256))= +1.99261e-001
+data8 0x3fc9f6c407089660, 0x3ca04d68658179a0 // log(1/frcpa(1+57/256))= +2.02843e-001
+data8 0x3fca58e729348f40, 0x3c99f5411546c286 // log(1/frcpa(1+58/256))= +2.05838e-001
+data8 0x3fcabb55c31693a0, 0x3cb9a5350eb327d5 // log(1/frcpa(1+59/256))= +2.08842e-001
+data8 0x3fcb1e104919efd0, 0x3c18965fcce7c406 // log(1/frcpa(1+60/256))= +2.11855e-001
+data8 0x3fcb94ee93e367c0, 0x3cb503716da45184 // log(1/frcpa(1+61/256))= +2.15483e-001
+data8 0x3fcbf851c0675550, 0x3cbdf1b3f7ab5378 // log(1/frcpa(1+62/256))= +2.18516e-001
+data8 0x3fcc5c0254bf23a0, 0x3ca7aab9ed0b1d7b // log(1/frcpa(1+63/256))= +2.21558e-001
+data8 0x3fccc000c9db3c50, 0x3c92a7a2a850072a // log(1/frcpa(1+64/256))= +2.24609e-001
+data8 0x3fcd244d99c85670, 0x3c9f6019120edf4c // log(1/frcpa(1+65/256))= +2.27670e-001
+data8 0x3fcd88e93fb2f450, 0x3c6affb96815e081 // log(1/frcpa(1+66/256))= +2.30741e-001
+data8 0x3fcdedd437eaef00, 0x3c72553595897976 // log(1/frcpa(1+67/256))= +2.33820e-001
+data8 0x3fce530effe71010, 0x3c90913b020fa182 // log(1/frcpa(1+68/256))= +2.36910e-001
+data8 0x3fceb89a1648b970, 0x3c837ba4045bfd25 // log(1/frcpa(1+69/256))= +2.40009e-001
+data8 0x3fcf1e75fadf9bd0, 0x3cbcea6d13e0498d // log(1/frcpa(1+70/256))= +2.43117e-001
+data8 0x3fcf84a32ead7c30, 0x3ca5e3a67b3c6d77 // log(1/frcpa(1+71/256))= +2.46235e-001
+data8 0x3fcfeb2233ea07c0, 0x3cba0c6f0049c5a6 // log(1/frcpa(1+72/256))= +2.49363e-001
+data8 0x3fd028f9c7035c18, 0x3cb0a30b06677ff6 // log(1/frcpa(1+73/256))= +2.52501e-001
+data8 0x3fd05c8be0d96358, 0x3ca0f1c77ccb5865 // log(1/frcpa(1+74/256))= +2.55649e-001
+data8 0x3fd085eb8f8ae790, 0x3cbd513f45fe7a97 // log(1/frcpa(1+75/256))= +2.58174e-001
+data8 0x3fd0b9c8e32d1910, 0x3c927449047ca006 // log(1/frcpa(1+76/256))= +2.61339e-001
+data8 0x3fd0edd060b78080, 0x3c89b52d8435f53e // log(1/frcpa(1+77/256))= +2.64515e-001
+data8 0x3fd122024cf00638, 0x3cbdd976fabda4bd // log(1/frcpa(1+78/256))= +2.67701e-001
+data8 0x3fd14be2927aecd0, 0x3cb02f90ad0bc471 // log(1/frcpa(1+79/256))= +2.70257e-001
+data8 0x3fd180618ef18ad8, 0x3cbd003792c71a98 // log(1/frcpa(1+80/256))= +2.73461e-001
+data8 0x3fd1b50bbe2fc638, 0x3ca9ae64c6403ead // log(1/frcpa(1+81/256))= +2.76675e-001
+data8 0x3fd1df4cc7cf2428, 0x3cb43f0455f7e395 // log(1/frcpa(1+82/256))= +2.79254e-001
+data8 0x3fd214456d0eb8d0, 0x3cb0fbd748d75d30 // log(1/frcpa(1+83/256))= +2.82487e-001
+data8 0x3fd23ec5991eba48, 0x3c906edd746b77e2 // log(1/frcpa(1+84/256))= +2.85081e-001
+data8 0x3fd2740d9f870af8, 0x3ca9802e6a00a670 // log(1/frcpa(1+85/256))= +2.88333e-001
+data8 0x3fd29ecdabcdfa00, 0x3cacecef70890cfa // log(1/frcpa(1+86/256))= +2.90943e-001
+data8 0x3fd2d46602adcce8, 0x3cb97911955f3521 // log(1/frcpa(1+87/256))= +2.94214e-001
+data8 0x3fd2ff66b04ea9d0, 0x3cb12dabe191d1c9 // log(1/frcpa(1+88/256))= +2.96838e-001
+data8 0x3fd335504b355a30, 0x3cbdf9139df924ec // log(1/frcpa(1+89/256))= +3.00129e-001
+data8 0x3fd360925ec44f58, 0x3cb253e68977a1e3 // log(1/frcpa(1+90/256))= +3.02769e-001
+data8 0x3fd38bf1c3337e70, 0x3cb3d283d2a2da21 // log(1/frcpa(1+91/256))= +3.05417e-001
+data8 0x3fd3c25277333180, 0x3cadaa5b035eae27 // log(1/frcpa(1+92/256))= +3.08735e-001
+data8 0x3fd3edf463c16838, 0x3cb983d680d3c108 // log(1/frcpa(1+93/256))= +3.11399e-001
+data8 0x3fd419b423d5e8c0, 0x3cbc86dd921c139d // log(1/frcpa(1+94/256))= +3.14069e-001
+data8 0x3fd44591e0539f48, 0x3c86a76d6dc2782e // log(1/frcpa(1+95/256))= +3.16746e-001
+data8 0x3fd47c9175b6f0a8, 0x3cb59a2e013c6b5f // log(1/frcpa(1+96/256))= +3.20103e-001
+data8 0x3fd4a8b341552b08, 0x3c93f1e86e468694 // log(1/frcpa(1+97/256))= +3.22797e-001
+data8 0x3fd4d4f390890198, 0x3cbf5e4ea7c5105a // log(1/frcpa(1+98/256))= +3.25498e-001
+data8 0x3fd501528da1f960, 0x3cbf58da53e9ad10 // log(1/frcpa(1+99/256))= +3.28206e-001
+data8 0x3fd52dd06347d4f0, 0x3cb98a28cebf6eef // log(1/frcpa(1+100/256))= +3.30921e-001
+data8 0x3fd55a6d3c7b8a88, 0x3c9c76b67c2d1fd4 // log(1/frcpa(1+101/256))= +3.33644e-001
+data8 0x3fd5925d2b112a58, 0x3c9029616a4331b8 // log(1/frcpa(1+102/256))= +3.37058e-001
+data8 0x3fd5bf406b543db0, 0x3c9fb8292ecfc820 // log(1/frcpa(1+103/256))= +3.39798e-001
+data8 0x3fd5ec433d5c35a8, 0x3cb71a1229d17eec // log(1/frcpa(1+104/256))= +3.42545e-001
+data8 0x3fd61965cdb02c18, 0x3cbba94fe1dbb8d2 // log(1/frcpa(1+105/256))= +3.45300e-001
+data8 0x3fd646a84935b2a0, 0x3c9ee496d2c9ae57 // log(1/frcpa(1+106/256))= +3.48063e-001
+data8 0x3fd6740add31de90, 0x3cb1da3a6c7a9dfd // log(1/frcpa(1+107/256))= +3.50833e-001
+data8 0x3fd6a18db74a58c0, 0x3cb494c257add8dc // log(1/frcpa(1+108/256))= +3.53610e-001
+data8 0x3fd6cf31058670e8, 0x3cb0b244a70a8da9 // log(1/frcpa(1+109/256))= +3.56396e-001
+data8 0x3fd6f180e852f0b8, 0x3c9db7aefa866720 // log(1/frcpa(1+110/256))= +3.58490e-001
+data8 0x3fd71f5d71b894e8, 0x3cbe91c4bf324957 // log(1/frcpa(1+111/256))= +3.61289e-001
+data8 0x3fd74d5aefd66d58, 0x3cb06b3d9bfac023 // log(1/frcpa(1+112/256))= +3.64096e-001
+data8 0x3fd77b79922bd378, 0x3cb727d8804491f4 // log(1/frcpa(1+113/256))= +3.66911e-001
+data8 0x3fd7a9b9889f19e0, 0x3ca2ef22df5bc543 // log(1/frcpa(1+114/256))= +3.69734e-001
+data8 0x3fd7d81b037eb6a0, 0x3cb8fd3ba07a7ece // log(1/frcpa(1+115/256))= +3.72565e-001
+data8 0x3fd8069e33827230, 0x3c8bd1e25866e61a // log(1/frcpa(1+116/256))= +3.75404e-001
+data8 0x3fd82996d3ef8bc8, 0x3ca5aab9f5928928 // log(1/frcpa(1+117/256))= +3.77538e-001
+data8 0x3fd85855776dcbf8, 0x3ca56f33337789d6 // log(1/frcpa(1+118/256))= +3.80391e-001
+data8 0x3fd8873658327cc8, 0x3cbb8ef0401db49d // log(1/frcpa(1+119/256))= +3.83253e-001
+data8 0x3fd8aa75973ab8c8, 0x3cbb9961f509a680 // log(1/frcpa(1+120/256))= +3.85404e-001
+data8 0x3fd8d992dc8824e0, 0x3cb220512a53732d // log(1/frcpa(1+121/256))= +3.88280e-001
+data8 0x3fd908d2ea7d9510, 0x3c985f0e513bfb5c // log(1/frcpa(1+122/256))= +3.91164e-001
+data8 0x3fd92c59e79c0e50, 0x3cb82e073fd30d63 // log(1/frcpa(1+123/256))= +3.93332e-001
+data8 0x3fd95bd750ee3ed0, 0x3ca4aa7cdb6dd8a8 // log(1/frcpa(1+124/256))= +3.96231e-001
+data8 0x3fd98b7811a3ee58, 0x3caa93a5b660893e // log(1/frcpa(1+125/256))= +3.99138e-001
+data8 0x3fd9af47f33d4068, 0x3cac294b3b3190ba // log(1/frcpa(1+126/256))= +4.01323e-001
+data8 0x3fd9df270c1914a0, 0x3cbe1a58fd0cd67e // log(1/frcpa(1+127/256))= +4.04245e-001
+data8 0x3fda0325ed14fda0, 0x3cb1efa7950fb57e // log(1/frcpa(1+128/256))= +4.06442e-001
+data8 0x3fda33440224fa78, 0x3c8915fe75e7d477 // log(1/frcpa(1+129/256))= +4.09379e-001
+data8 0x3fda57725e80c380, 0x3ca72bd1062b1b7f // log(1/frcpa(1+130/256))= +4.11587e-001
+data8 0x3fda87d0165dd198, 0x3c91f7845f58dbad // log(1/frcpa(1+131/256))= +4.14539e-001
+data8 0x3fdaac2e6c03f890, 0x3cb6f237a911c509 // log(1/frcpa(1+132/256))= +4.16759e-001
+data8 0x3fdadccc6fdf6a80, 0x3c90ddc4b7687169 // log(1/frcpa(1+133/256))= +4.19726e-001
+data8 0x3fdb015b3eb1e790, 0x3c692dd7d90e1e8e // log(1/frcpa(1+134/256))= +4.21958e-001
+data8 0x3fdb323a3a635948, 0x3c6f85655cbe14de // log(1/frcpa(1+135/256))= +4.24941e-001
+data8 0x3fdb56fa04462908, 0x3c95252d841994de // log(1/frcpa(1+136/256))= +4.27184e-001
+data8 0x3fdb881aa659bc90, 0x3caa53a745a3642f // log(1/frcpa(1+137/256))= +4.30182e-001
+data8 0x3fdbad0bef3db160, 0x3cb32f2540dcc16a // log(1/frcpa(1+138/256))= +4.32437e-001
+data8 0x3fdbd21297781c28, 0x3cbd8e891e106f1d // log(1/frcpa(1+139/256))= +4.34697e-001
+data8 0x3fdc039236f08818, 0x3c809435af522ba7 // log(1/frcpa(1+140/256))= +4.37718e-001
+data8 0x3fdc28cb1e4d32f8, 0x3cb3944752fbd81e // log(1/frcpa(1+141/256))= +4.39990e-001
+data8 0x3fdc4e19b84723c0, 0x3c9a465260cd3fe5 // log(1/frcpa(1+142/256))= +4.42267e-001
+data8 0x3fdc7ff9c74554c8, 0x3c92447d5b6ca369 // log(1/frcpa(1+143/256))= +4.45311e-001
+data8 0x3fdca57b64e9db00, 0x3cb44344a8a00c82 // log(1/frcpa(1+144/256))= +4.47600e-001
+data8 0x3fdccb130a5ceba8, 0x3cbefaddfb97b73f // log(1/frcpa(1+145/256))= +4.49895e-001
+data8 0x3fdcf0c0d18f3268, 0x3cbd3e7bfee57898 // log(1/frcpa(1+146/256))= +4.52194e-001
+data8 0x3fdd232075b5a200, 0x3c9222599987447c // log(1/frcpa(1+147/256))= +4.55269e-001
+data8 0x3fdd490246defa68, 0x3cabafe9a767a80d // log(1/frcpa(1+148/256))= +4.57581e-001
+data8 0x3fdd6efa918d25c8, 0x3cb58a2624e1c6fd // log(1/frcpa(1+149/256))= +4.59899e-001
+data8 0x3fdd9509707ae528, 0x3cbdc3babce578e7 // log(1/frcpa(1+150/256))= +4.62221e-001
+data8 0x3fddbb2efe92c550, 0x3cb0ac0943c434a4 // log(1/frcpa(1+151/256))= +4.64550e-001
+data8 0x3fddee2f3445e4a8, 0x3cbba9d07ce820e8 // log(1/frcpa(1+152/256))= +4.67663e-001
+data8 0x3fde148a1a2726c8, 0x3cb6537e3375b205 // log(1/frcpa(1+153/256))= +4.70004e-001
+data8 0x3fde3afc0a49ff38, 0x3cbfed5518dbc20e // log(1/frcpa(1+154/256))= +4.72350e-001
+data8 0x3fde6185206d5168, 0x3cb6572601f73d5c // log(1/frcpa(1+155/256))= +4.74702e-001
+data8 0x3fde882578823d50, 0x3c9b24abd4584d1a // log(1/frcpa(1+156/256))= +4.77060e-001
+data8 0x3fdeaedd2eac9908, 0x3cb0ceb5e4d2c8f7 // log(1/frcpa(1+157/256))= +4.79423e-001
+data8 0x3fded5ac5f436be0, 0x3ca72f21f1f5238e // log(1/frcpa(1+158/256))= +4.81792e-001
+data8 0x3fdefc9326d16ab8, 0x3c85081a1639a45c // log(1/frcpa(1+159/256))= +4.84166e-001
+data8 0x3fdf2391a21575f8, 0x3cbf11015bdd297a // log(1/frcpa(1+160/256))= +4.86546e-001
+data8 0x3fdf4aa7ee031928, 0x3cb3795bc052a2d1 // log(1/frcpa(1+161/256))= +4.88932e-001
+data8 0x3fdf71d627c30bb0, 0x3c35c61f0f5a88f3 // log(1/frcpa(1+162/256))= +4.91323e-001
+data8 0x3fdf991c6cb3b378, 0x3c97d99419be6028 // log(1/frcpa(1+163/256))= +4.93720e-001
+data8 0x3fdfc07ada69a908, 0x3cbfe9341ded70b1 // log(1/frcpa(1+164/256))= +4.96123e-001
+data8 0x3fdfe7f18eb03d38, 0x3cb85718a640c33f // log(1/frcpa(1+165/256))= +4.98532e-001
+data8 0x3fe007c053c5002c, 0x3cb3addc9c065f09 // log(1/frcpa(1+166/256))= +5.00946e-001
+data8 0x3fe01b942198a5a0, 0x3c9d5aa4c77da6ac // log(1/frcpa(1+167/256))= +5.03367e-001
+data8 0x3fe02f74400c64e8, 0x3cb5a0ee4450ef52 // log(1/frcpa(1+168/256))= +5.05793e-001
+data8 0x3fe04360be7603ac, 0x3c9dd00c35630fe0 // log(1/frcpa(1+169/256))= +5.08225e-001
+data8 0x3fe05759ac47fe30, 0x3cbd063e1f0bd82c // log(1/frcpa(1+170/256))= +5.10663e-001
+data8 0x3fe06b5f1911cf50, 0x3cae8da674af5289 // log(1/frcpa(1+171/256))= +5.13107e-001
+data8 0x3fe078bf0533c568, 0x3c62241edf5fd1f7 // log(1/frcpa(1+172/256))= +5.14740e-001
+data8 0x3fe08cd9687e7b0c, 0x3cb3007febcca227 // log(1/frcpa(1+173/256))= +5.17194e-001
+data8 0x3fe0a10074cf9018, 0x3ca496e84603816b // log(1/frcpa(1+174/256))= +5.19654e-001
+data8 0x3fe0b5343a234474, 0x3cb46098d14fc90a // log(1/frcpa(1+175/256))= +5.22120e-001
+data8 0x3fe0c974c89431cc, 0x3cac0a7cdcbb86c6 // log(1/frcpa(1+176/256))= +5.24592e-001
+data8 0x3fe0ddc2305b9884, 0x3cb2f753210410ff // log(1/frcpa(1+177/256))= +5.27070e-001
+data8 0x3fe0eb524bafc918, 0x3c88affd6682229e // log(1/frcpa(1+178/256))= +5.28726e-001
+data8 0x3fe0ffb54213a474, 0x3cadeefbab9af993 // log(1/frcpa(1+179/256))= +5.31214e-001
+data8 0x3fe114253da97d9c, 0x3cbaf1c2b8bc160a // log(1/frcpa(1+180/256))= +5.33709e-001
+data8 0x3fe128a24f1d9afc, 0x3cb9cf4df375e650 // log(1/frcpa(1+181/256))= +5.36210e-001
+data8 0x3fe1365252bf0864, 0x3c985a621d4be111 // log(1/frcpa(1+182/256))= +5.37881e-001
+data8 0x3fe14ae558b4a92c, 0x3ca104c4aa8977d1 // log(1/frcpa(1+183/256))= +5.40393e-001
+data8 0x3fe15f85a19c7658, 0x3cbadf26e540f375 // log(1/frcpa(1+184/256))= +5.42910e-001
+data8 0x3fe16d4d38c119f8, 0x3cb3aea11caec416 // log(1/frcpa(1+185/256))= +5.44592e-001
+data8 0x3fe18203c20dd130, 0x3cba82d1211d1d6d // log(1/frcpa(1+186/256))= +5.47121e-001
+data8 0x3fe196c7bc4b1f38, 0x3cb6267acc4f4f4a // log(1/frcpa(1+187/256))= +5.49656e-001
+data8 0x3fe1a4a738b7a33c, 0x3c858930213c987d // log(1/frcpa(1+188/256))= +5.51349e-001
+data8 0x3fe1b981c0c9653c, 0x3c9bc2a4a30f697b // log(1/frcpa(1+189/256))= +5.53895e-001
+data8 0x3fe1ce69e8bb1068, 0x3cb7ae6199cf2a00 // log(1/frcpa(1+190/256))= +5.56447e-001
+data8 0x3fe1dc619de06944, 0x3c6b50bb38388177 // log(1/frcpa(1+191/256))= +5.58152e-001
+data8 0x3fe1f160a2ad0da0, 0x3cbd05b2778a5e1d // log(1/frcpa(1+192/256))= +5.60715e-001
+data8 0x3fe2066d7740737c, 0x3cb32e828f9c6bd6 // log(1/frcpa(1+193/256))= +5.63285e-001
+data8 0x3fe2147dba47a390, 0x3cbd579851b8b672 // log(1/frcpa(1+194/256))= +5.65001e-001
+data8 0x3fe229a1bc5ebac0, 0x3cbb321be5237ce8 // log(1/frcpa(1+195/256))= +5.67582e-001
+data8 0x3fe237c1841a502c, 0x3cb3b56e0915ea64 // log(1/frcpa(1+196/256))= +5.69306e-001
+data8 0x3fe24cfce6f80d98, 0x3cb34a4d1a422919 // log(1/frcpa(1+197/256))= +5.71898e-001
+data8 0x3fe25b2c55cd5760, 0x3cb237401ea5015e // log(1/frcpa(1+198/256))= +5.73630e-001
+data8 0x3fe2707f4d5f7c40, 0x3c9d30f20acc8341 // log(1/frcpa(1+199/256))= +5.76233e-001
+data8 0x3fe285e0842ca380, 0x3cbc4d866d5f21c0 // log(1/frcpa(1+200/256))= +5.78842e-001
+data8 0x3fe294294708b770, 0x3cb85e14d5dc54fa // log(1/frcpa(1+201/256))= +5.80586e-001
+data8 0x3fe2a9a2670aff0c, 0x3c7e6f8f468bbf91 // log(1/frcpa(1+202/256))= +5.83207e-001
+data8 0x3fe2b7fb2c8d1cc0, 0x3c930ffcf63c8b65 // log(1/frcpa(1+203/256))= +5.84959e-001
+data8 0x3fe2c65a6395f5f4, 0x3ca0afe20b53d2d2 // log(1/frcpa(1+204/256))= +5.86713e-001
+data8 0x3fe2dbf557b0df40, 0x3cb646be1188fbc9 // log(1/frcpa(1+205/256))= +5.89350e-001
+data8 0x3fe2ea64c3f97654, 0x3c96516fa8df33b2 // log(1/frcpa(1+206/256))= +5.91113e-001
+data8 0x3fe3001823684d70, 0x3cb96d64e16d1360 // log(1/frcpa(1+207/256))= +5.93762e-001
+data8 0x3fe30e97e9a8b5cc, 0x3c98ef96bc97cca0 // log(1/frcpa(1+208/256))= +5.95531e-001
+data8 0x3fe32463ebdd34e8, 0x3caef1dc9a56c1bf // log(1/frcpa(1+209/256))= +5.98192e-001
+data8 0x3fe332f4314ad794, 0x3caa4f0ac5d5fa11 // log(1/frcpa(1+210/256))= +5.99970e-001
+data8 0x3fe348d90e7464cc, 0x3cbe7889f0516acd // log(1/frcpa(1+211/256))= +6.02643e-001
+data8 0x3fe35779f8c43d6c, 0x3ca96bbab7245411 // log(1/frcpa(1+212/256))= +6.04428e-001
+data8 0x3fe36621961a6a98, 0x3ca31f32262db9fb // log(1/frcpa(1+213/256))= +6.06217e-001
+data8 0x3fe37c299f3c3668, 0x3cb15c72c107ee29 // log(1/frcpa(1+214/256))= +6.08907e-001
+data8 0x3fe38ae2171976e4, 0x3cba42a2554b2dd4 // log(1/frcpa(1+215/256))= +6.10704e-001
+data8 0x3fe399a157a603e4, 0x3cb99c62286d8919 // log(1/frcpa(1+216/256))= +6.12504e-001
+data8 0x3fe3afccfe77b9d0, 0x3ca11048f96a43bd // log(1/frcpa(1+217/256))= +6.15210e-001
+data8 0x3fe3be9d503533b4, 0x3ca4022f47588c3e // log(1/frcpa(1+218/256))= +6.17018e-001
+data8 0x3fe3cd7480b4a8a0, 0x3cb4ba7afc2dc56a // log(1/frcpa(1+219/256))= +6.18830e-001
+data8 0x3fe3e3c43918f76c, 0x3c859673d064b8ba // log(1/frcpa(1+220/256))= +6.21554e-001
+data8 0x3fe3f2acb27ed6c4, 0x3cb55c6b452a16a8 // log(1/frcpa(1+221/256))= +6.23373e-001
+data8 0x3fe4019c2125ca90, 0x3cb8c367879c5a31 // log(1/frcpa(1+222/256))= +6.25197e-001
+data8 0x3fe4181061389720, 0x3cb2c17a79c5cc6c // log(1/frcpa(1+223/256))= +6.27937e-001
+data8 0x3fe42711518df544, 0x3ca5f38d47012fc5 // log(1/frcpa(1+224/256))= +6.29769e-001
+data8 0x3fe436194e12b6bc, 0x3cb9854d65a9b426 // log(1/frcpa(1+225/256))= +6.31604e-001
+data8 0x3fe445285d68ea68, 0x3ca3ff9b3a81cd81 // log(1/frcpa(1+226/256))= +6.33442e-001
+data8 0x3fe45bcc464c8938, 0x3cb0a2d8011a6c05 // log(1/frcpa(1+227/256))= +6.36206e-001
+data8 0x3fe46aed21f117fc, 0x3c8a2be41f8e9f3d // log(1/frcpa(1+228/256))= +6.38053e-001
+data8 0x3fe47a1527e8a2d0, 0x3cba4a83594fab09 // log(1/frcpa(1+229/256))= +6.39903e-001
+data8 0x3fe489445efffcc8, 0x3cbf306a23dcbcde // log(1/frcpa(1+230/256))= +6.41756e-001
+data8 0x3fe4a018bcb69834, 0x3ca46c9285029fd1 // log(1/frcpa(1+231/256))= +6.44543e-001
+data8 0x3fe4af5a0c9d65d4, 0x3cbbc1db897580e3 // log(1/frcpa(1+232/256))= +6.46405e-001
+data8 0x3fe4bea2a5bdbe84, 0x3cb84d880d7ef775 // log(1/frcpa(1+233/256))= +6.48271e-001
+data8 0x3fe4cdf28f10ac44, 0x3cb3ec4b7893ce1f // log(1/frcpa(1+234/256))= +6.50140e-001
+data8 0x3fe4dd49cf994058, 0x3c897224d59d3408 // log(1/frcpa(1+235/256))= +6.52013e-001
+data8 0x3fe4eca86e64a680, 0x3cbccf620f24f0cd // log(1/frcpa(1+236/256))= +6.53889e-001
+data8 0x3fe503c43cd8eb68, 0x3c3f872c65971084 // log(1/frcpa(1+237/256))= +6.56710e-001
+data8 0x3fe513356667fc54, 0x3cb9ca64cc3d52c8 // log(1/frcpa(1+238/256))= +6.58595e-001
+data8 0x3fe522ae0738a3d4, 0x3cbe708164c75968 // log(1/frcpa(1+239/256))= +6.60483e-001
+data8 0x3fe5322e26867854, 0x3cb9988ba4aea615 // log(1/frcpa(1+240/256))= +6.62376e-001
+data8 0x3fe541b5cb979808, 0x3ca1662e3a6b95f5 // log(1/frcpa(1+241/256))= +6.64271e-001
+data8 0x3fe55144fdbcbd60, 0x3cb3acd4ca45c1e0 // log(1/frcpa(1+242/256))= +6.66171e-001
+data8 0x3fe560dbc45153c4, 0x3cb4988947959fed // log(1/frcpa(1+243/256))= +6.68074e-001
+data8 0x3fe5707a26bb8c64, 0x3cb3017fe6607ba9 // log(1/frcpa(1+244/256))= +6.69980e-001
+data8 0x3fe587f60ed5b8fc, 0x3cbe7a3266366ed4 // log(1/frcpa(1+245/256))= +6.72847e-001
+data8 0x3fe597a7977c8f30, 0x3ca1e12b9959a90e // log(1/frcpa(1+246/256))= +6.74763e-001
+data8 0x3fe5a760d634bb88, 0x3cb7c365e53d9602 // log(1/frcpa(1+247/256))= +6.76682e-001
+data8 0x3fe5b721d295f10c, 0x3cb716c2551ccbf0 // log(1/frcpa(1+248/256))= +6.78605e-001
+data8 0x3fe5c6ea94431ef8, 0x3ca02b2ed0e28261 // log(1/frcpa(1+249/256))= +6.80532e-001
+data8 0x3fe5d6bb22ea86f4, 0x3caf43a8bbb2f974 // log(1/frcpa(1+250/256))= +6.82462e-001
+data8 0x3fe5e6938645d38c, 0x3cbcedc98821b333 // log(1/frcpa(1+251/256))= +6.84397e-001
+data8 0x3fe5f673c61a2ed0, 0x3caa385eef5f2789 // log(1/frcpa(1+252/256))= +6.86335e-001
+data8 0x3fe6065bea385924, 0x3cb11624f165c5b4 // log(1/frcpa(1+253/256))= +6.88276e-001
+data8 0x3fe6164bfa7cc068, 0x3cbad884f87073fa // log(1/frcpa(1+254/256))= +6.90222e-001
+data8 0x3fe62643fecf9740, 0x3cb78c51da12f4df // log(1/frcpa(1+255/256))= +6.92171e-001
+LOCAL_OBJECT_END(pow_Tt)
+
+
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+LOCAL_OBJECT_START(pow_tbl1)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(pow_tbl1)
+
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(pow_tbl2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(pow_tbl2)
+
+.section .text
+GLOBAL_IEEE754_ENTRY(pow)
+
+// Get exponent of x. Will be used to calculate K.
+{ .mfi
+ getf.exp pow_GR_signexp_X = f8
+ fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
+ mov pow_GR_17ones = 0x1FFFF
+}
+{ .mfi
+ addl pow_AD_P = @ltoff(pow_table_P), gp
+ fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
+ nop.i 999
+;;
+}
+
+// Get significand of x. Will be used to get index to fetch T, Tt.
+{ .mfi
+ getf.sig pow_GR_sig_X = f8
+ frcpa.s1 POW_B, p6 = f1,f8
+ nop.i 999
+}
+{ .mfi
+ ld8 pow_AD_P = [pow_AD_P]
+ fma.s1 POW_NORM_X = f8,f1,f0
+ mov pow_GR_exp_2tom8 = 0xFFF7
+}
+;;
+
+// p13 = TRUE ==> X is unorm
+// DOUBLE 0x10033 exponent limit at which y is an integer
+{ .mfi
+ nop.m 999
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ addl pow_GR_10033 = 0x10033, r0
+}
+{ .mfi
+ mov pow_GR_16ones = 0xFFFF
+ fma.s1 POW_NORM_Y = f9,f1,f0
+ nop.i 999
+}
+;;
+
+// p14 = TRUE ==> X is ZERO
+{ .mfi
+ adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
+ fclass.m p14,p0 = f8, 0x07
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+}
+{ .mfi
+ adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe POW_P5 = [pow_AD_P], 16
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ nop.i 999
+}
+{ .mib
+ ldfe POW_P4 = [pow_AD_Q], 16
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+(p13) br.cond.spnt POW_X_DENORM
+}
+;;
+
+// Continue normal and denormal paths here
+POW_COMMON:
+// p11 = TRUE ==> Y is a NAN
+{ .mfi
+ ldfe POW_P3 = [pow_AD_P], 16
+ fclass.m p11,p0 = f9, 0xc3
+ nop.i 999
+}
+{ .mfi
+ ldfe POW_P2 = [pow_AD_Q], 16
+ nop.f 999
+ mov pow_GR_y_zero = 0
+}
+;;
+
+// Note POW_Xm1 and POW_r1 are used interchangeably
+{ .mfi
+ alloc r32=ar.pfs,2,19,4,0
+ fms.s1 POW_r = POW_B, POW_NORM_X,f1
+ nop.i 999
+}
+{ .mfi
+ setf.sig POW_int_K = pow_GR_true_exp_X
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+ nop.i 999
+}
+;;
+
+// p12 = TRUE if Y is ZERO
+// Compute xsq to decide later if |x|=1
+{ .mfi
+ ldfe POW_P1 = [pow_AD_P], 16
+ fclass.m p12,p0 = f9, 0x07
+ shl pow_GR_offset = pow_GR_sig_X, 1
+}
+{ .mfb
+ ldfe POW_P0 = [pow_AD_Q], 16
+ fma.s1 POW_xsq = POW_NORM_X, POW_NORM_X, f0
+(p11) br.cond.spnt POW_Y_NAN // Branch if y=nan
+}
+;;
+
+// Get exponent of |x|-1 to use in comparison to 2^-8
+{ .mfi
+ getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+ shr.u pow_GR_offset = pow_GR_offset,56
+}
+;;
+
+// p11 = TRUE ==> X is a NAN
+{ .mfi
+ ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
+ fclass.m p11,p0 = f8, 0xc3
+ shladd pow_AD_Tt = pow_GR_offset, 4, pow_AD_Tt
+}
+{ .mfi
+ ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
+ fma.s1 POW_delta = f0,f0,f0 // delta=0 in case |x| near 1
+(p12) mov pow_GR_y_zero = 1
+}
+;;
+
+{ .mfi
+ ldfpd POW_Q2, POW_Q3 = [pow_AD_P], 16
+ fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1
+ and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
+}
+;;
+
+// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
+{ .mfi
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+ nop.f 999
+ cmp.lt p6,p7 = pow_GR_exp_Xm1, pow_GR_exp_2tom8
+}
+{ .mfb
+ ldfpd POW_T, POW_Tt = [pow_AD_Tt], 16
+ fma.s1 POW_rsq = POW_r, POW_r,f0
+(p11) br.cond.spnt POW_X_NAN // Branch if x=nan and y not nan
+}
+;;
+
+// If on the x near 1 path, assign r1 to r and r1*r1 to rsq
+{ .mfi
+ ldfpd POW_Q0_half, POW_Q1 = [pow_AD_P], 16
+(p6) fma.s1 POW_r = POW_r1, f1, f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+(p14) br.cond.spnt POW_X_0 // Branch if x zero and y not nan
+}
+;;
+
+{ .mfi
+ ldfpd POW_Q4, POW_RSHF = [pow_AD_P], 16
+(p7) fma.s1 POW_v6 = POW_r, POW_P5, POW_P4
+ nop.i 999
+}
+{ .mfi
+ mov pow_GR_exp_2toM63 = 0xffc0 // Exponent of 2^-63
+(p6) fma.s1 POW_v6 = POW_r1, POW_P5, POW_P4
+ nop.i 999
+}
+;;
+
+{ .mfi
+ setf.exp POW_2toM63 = pow_GR_exp_2toM63 // Form 2^-63 for test of q
+(p7) fma.s1 POW_v4 = POW_P3, POW_r, POW_P2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fma.s1 POW_v4 = POW_P3, POW_r1, POW_P2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_K = POW_int_K
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y
+ fnma.s1 POW_twoV = POW_NORM_Y, POW_rsq,f0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+(p12) br.cond.spnt POW_Y_0 // Branch if y=zero, x not zero or nan
+}
+;;
+
+// p11 = TRUE ==> X is NEGATIVE but not inf
+{ .mfi
+ ldfe POW_log2_by_128_lo = [pow_AD_P], 16
+ fclass.m p11,p0 = f8, 0x1a
+ nop.i 999
+}
+{ .mfi
+ ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
+ fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_float_int_Y = POW_int_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_v6, POW_rsq, POW_v4
+ adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, POW_Tt
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+ adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fms.s1 POW_e2 = POW_NORM_Y, POW_r, POW_U
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z2 = POW_twoV, POW_Q0_half, POW_U
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_p = POW_rsq, POW_v3, POW_v2
+ nop.i 999
+}
+;;
+
+// p11 = TRUE ==> X is NEGATIVE but not inf
+// p12 = TRUE ==> X is NEGATIVE AND Y already even int
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z1 = POW_NORM_Y, POW_G, f0
+(p11) cmp.gt.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Gpr = POW_G, f1, POW_r
+ nop.i 999
+}
+;;
+
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W2 = POW_Z2, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fms.s1 POW_UmZ2 = POW_U, f1, POW_Z2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_GY_Z2 = POW_G, POW_NORM_Y, POW_Z2
+ nop.i 999
+}
+;;
+
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fms.s1 POW_e1 = POW_NORM_Y, POW_G, POW_Z1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W1 = POW_Z1, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+;;
+
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+// p10 = TRUE ==> X is NEG and Y is an int
+// p12 = TRUE ==> X is NEG and Y is not an int
+{ .mfi
+ nop.m 999
+(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ mov pow_GR_xneg_yodd = 0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
+ nop.i 999
+}
+;;
+
+// By subtracting RSHF we get rounded integer POW_N2float
+{ .mfi
+ nop.m 999
+ fms.s1 POW_N2float = POW_W2, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_UmZ2pV = POW_twoV,POW_Q0_half,POW_UmZ2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v4 = POW_Z3, POW_Q3, POW_Q2
+ nop.i 999
+}
+;;
+
+// Extract rounded integer from rightmost significand of POW_W2
+// By subtracting RSHF we get rounded integer POW_N1float
+{ .mfi
+ getf.sig pow_GR_int_W2 = POW_W2
+ fms.s1 POW_N1float = POW_W1, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_Z3, POW_Q1, POW_Q0_half
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fnma.s1 POW_s2 = POW_N2float, POW_log2_by_128_hi, POW_Z2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e2 = POW_e2,f1,POW_UmZ2pV
+ nop.i 999
+}
+;;
+
+// Extract rounded integer from rightmost significand of POW_W1
+// Test if x inf
+{ .mfi
+ getf.sig pow_GR_int_W1 = POW_W1
+ fclass.m p15,p0 = POW_NORM_X, 0x23
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_f2 = POW_N2float, POW_log2_by_128_lo, f1
+(p12) br.cond.spnt POW_X_NEG_Y_NONINT // Branch if x neg, y not integer
+}
+;;
+
+// p11 = TRUE ==> X is +1.0
+// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
+{ .mfi
+ getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
+ fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
+(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_Z3sq, POW_Q4, POW_v4
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fnma.s1 POW_f1 = POW_N1float, POW_log2_by_128_lo, f1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_s1 = POW_N1float, POW_log2_by_128_hi, POW_Z1
+(p15) br.cond.spnt POW_X_INF
+}
+;;
+
+// Test x and y and flag denormal
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p15,p0 = f8,f9
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_pYrcub_e3 = POW_p, POW_Yrcub, POW_e3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_e12 = POW_e1,f1,POW_e2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ add pow_GR_int_N = pow_GR_int_W1, pow_GR_int_W2
+(p11) fma.d.s0 f8 = f1,f1,f0 // If x=1, result is +1
+ nop.i 999
+}
+{ .mib
+(p12) mov pow_GR_xneg_yodd = 1
+ nop.i 999
+(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1
+}
+;;
+
+{ .mfi
+ and pow_GR_index1 = 0x0f, pow_GR_int_N
+ fma.s1 POW_q = POW_Z3sq, POW_v3, POW_v2
+ shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128
+}
+{ .mib
+ and pow_GR_index2 = 0x70, pow_GR_int_N
+ cmp.eq p6, p0 = pow_GR_xneg_yodd, r0
+(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
+}
+;;
+
+{ .mfi
+ shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ fma.s1 POW_s = POW_s1, f1, POW_s2
+ add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M
+}
+{ .mfi
+ add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+ fma.s1 POW_f12 = POW_f1, POW_f2,f0
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+{ .mmi
+ ldfe POW_T1 = [pow_AD_T1]
+ ldfe POW_T2 = [pow_AD_T2]
+ sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
+}
+;;
+
+{ .mfi
+ setf.exp POW_2M = pow_int_GR_M
+ fma.s1 POW_e123 = POW_e12, f1, POW_e3
+ nop.i 999
+}
+{ .mfb
+(p6) cmp.gt p6, p0 = -11, pow_GR_true_exp_Y_Gpr
+ fma.s1 POW_d = POW_GY_Z2, f1, POW_pYrcub_e3
+(p6) br.cond.spnt POW_NEAR_ONE // branch if |y*log(x)| < 2^(-11)
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+ nop.i 999
+}
+;;
+
+// p8 TRUE ==> |Y(G + r)| >= 10
+
+// double
+// -2^10 -2^9 2^9 2^10
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+
+// Form signexp of constants to indicate overflow
+{ .mfi
+ mov pow_GR_big_pos = 0x103ff
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ cmp.le p8,p9 = 10, pow_GR_true_exp_Y_Gpr
+}
+{ .mfi
+ mov pow_GR_big_neg = 0x303ff
+ fma.s1 POW_v4 = POW_s, POW_Q3, POW_Q2
+ andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+// Form big positive and negative constants to test for possible overflow
+{ .mfi
+ setf.exp POW_big_pos = pow_GR_big_pos
+ fma.s1 POW_v2 = POW_s, POW_Q1, POW_Q0_half
+(p9) cmp.le.unc p0,p10 = 9, pow_GR_true_exp_Y_Gpr
+}
+{ .mfb
+ setf.exp POW_big_neg = pow_GR_big_neg
+ fma.s1 POW_1ps = f1,f1,POW_s
+(p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF
+}
+;;
+
+// f123 = f12*(e123+1) = f12*e123+f12
+{ .mfi
+ nop.m 999
+ fma.s1 POW_f123 = POW_e123,POW_f12,POW_f12
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v3 = POW_ssq, POW_Q4, POW_v4
+ cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v21ps = POW_ssq, POW_v2, POW_1ps
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_s4 = POW_ssq, POW_ssq, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 POW_A = POW_2M, POW_f123, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fma.s1 POW_A = POW_2M, POW_f123, f0
+ cmp.eq p14,p11 = r0,r0 // Initialize p14 on, p11 off
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmerge.s POW_abs_q = f0, POW_q // Form |q| so can test its size
+ nop.i 999
+}
+;;
+
+{ .mfi
+(p10) cmp.eq p0,p14 = r0,r0 // Turn off p14 if no overflow
+ fma.s1 POW_es = POW_s4, POW_v3, POW_v21ps
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_A, POW_T1T2, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+// Test for |q| < 2^-63. If so then reverse last two steps of the result
+// to avoid monotonicity problems for results near 1.0 in round up/down/zero.
+// p11 will be set if need to reverse the order, p14 if not.
+ nop.m 999
+(p10) fcmp.lt.s0 p11,p14 = POW_abs_q, POW_2toM63 // Test |q| <2^-63
+ nop.i 999
+}
+;;
+
+.pred.rel "mutex",p11,p14
+{ .mfi
+ nop.m 999
+(p14) fma.s1 POW_A = POW_A, POW_es, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p11) fma.s1 POW_A = POW_A, POW_q, POW_A
+ nop.i 999
+}
+;;
+
+// Dummy op to set inexact if |q| < 2^-63
+{ .mfi
+ nop.m 999
+(p11) fma.d.s0 POW_tmp = POW_A, POW_q, POW_A
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fma.d.s0 f8 = POW_A, POW_q, POW_A
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p11) fma.d.s0 f8 = POW_A, POW_es, f0
+(p10) br.ret.sptk b0 // Exit main branch if no over/underflow
+}
+;;
+
+// POSSIBLE_OVER_UNDER
+// p6 = TRUE ==> Y_Gpr negative
+// Result is already computed. We just need to know if over/underflow occurred.
+
+{ .mfb
+ cmp.eq p0,p6 = pow_GR_sign_Y_Gpr, r0
+ nop.f 999
+(p6) br.cond.spnt POW_POSSIBLE_UNDER
+}
+;;
+
+// POSSIBLE_OVER
+// We got an answer.
+// overflow is a possibility, not a certainty
+
+
+// We define an overflow when the answer with
+// WRE set
+// user-defined rounding mode
+
+// double
+// Largest double is 7FE (biased double)
+// 7FE - 3FF + FFFF = 103FE
+// Create + largest_double_plus_ulp
+// Create - largest_double_plus_ulp
+// Calculate answer with WRE set.
+
+// single
+// Largest single is FE (biased double)
+// FE - 7F + FFFF = 1007E
+// Create + largest_single_plus_ulp
+// Create - largest_single_plus_ulp
+// Calculate answer with WRE set.
+
+// Cases when answer is ldn+1 are as follows:
+// ldn ldn+1
+// --+----------|----------+------------
+// |
+// +inf +inf -inf
+// RN RN
+// RZ
+
+// Put in s2 (td set, wre set)
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x42
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.d.s2 POW_wre_urm_f8 = POW_A, POW_q, POW_A
+ nop.i 999
+}
+;;
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+// p7 = TRUE ==> yes, we have an overflow
+{ .mfi
+ nop.m 999
+ fcmp.ge.s1 p7, p8 = POW_wre_urm_f8, POW_big_pos
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.le.s1 p7, p0 = POW_wre_urm_f8, POW_big_neg
+ nop.i 999
+}
+;;
+
+{ .mbb
+(p7) mov pow_GR_tag = 24
+(p7) br.cond.spnt __libm_error_region // Branch if overflow
+ br.ret.sptk b0 // Exit if did not overflow
+}
+;;
+
+// Here if |y*log(x)| < 2^(-11)
+// pow(x,y) ~ exp(d) ~ 1 + d + 0.5*d^2 + Q1*d^3 + Q2*d^4, where d = y*log(x)
+.align 32
+POW_NEAR_ONE:
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_d2 = POW_d, POW_d, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_poly_d_hi = POW_d, POW_Q0_half, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_poly_d_lo = POW_d, POW_Q2, POW_Q1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_poly_d = POW_d2, POW_poly_d_lo, POW_poly_d_hi
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fma.d.s0 f8 = POW_d, POW_poly_d, f1
+ br.ret.sptk b0 // exit function for arguments |y*log(x)| < 2^(-11)
+}
+;;
+
+POW_POSSIBLE_UNDER:
+// We got an answer. input was < -2^9 but > -2^10 (double)
+// We got an answer. input was < -2^6 but > -2^7 (float)
+// underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+// Put in s2 (td set, ftz set)
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x41
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.d.s2 POW_ftz_urm_f8 = POW_A, POW_q, POW_A
+ nop.i 999
+}
+;;
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+// p7 = TRUE ==> yes, we have an underflow
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p7, p0 = POW_ftz_urm_f8, f0
+ nop.i 999
+}
+;;
+
+{ .mbb
+(p7) mov pow_GR_tag = 25
+(p7) br.cond.spnt __libm_error_region // Branch if underflow
+ br.ret.sptk b0 // Exit if did not underflow
+}
+;;
+
+POW_X_DENORM:
+// Here if x unorm. Use the NORM_X for getf instructions, and then back
+// to normal path
+{ .mfi
+ getf.exp pow_GR_signexp_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+ getf.sig pow_GR_sig_X = POW_NORM_X
+;;
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ nop.i 999
+}
+;;
+
+{ .mib
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ nop.i 999
+ br.cond.sptk POW_COMMON
+}
+;;
+
+POW_X_0:
+// Here if x=0 and y not nan
+//
+// We have the following cases:
+// p6 x=0 and y>0 and is an integer (may be even or odd)
+// p7 x=0 and y>0 and is NOT an integer, return +0
+// p8 x=0 and y>0 and so big as to always be an even integer, return +0
+// p9 x=0 and y>0 and may not be integer
+// p10 x=0 and y>0 and is an odd integer, return x
+// p11 x=0 and y>0 and is an even integer, return +0
+// p12 used in dummy fcmp to set denormal flag if y=unorm
+// p13 x=0 and y>0
+// p14 x=0 and y=0, branch to code for calling error handling
+// p15 x=0 and y<0, branch to code for calling error handling
+//
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y
+ fcmp.lt.s1 p15,p13 = f9, f0 // Test for y<0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ cmp.ne p14,p0 = pow_GR_y_zero,r0 // Test for y=0
+ fcvt.xf POW_float_int_Y = POW_int_Y
+(p14) br.cond.spnt POW_X_0_Y_0 // Branch if x=0 and y=0
+}
+;;
+
+// If x=0 and y>0, test y and flag denormal
+{ .mfb
+(p13) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int
+(p13) fcmp.eq.s0 p12,p0 = f9,f0 // If x=0, y>0 dummy op to flag denormal
+(p15) br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0
+}
+;;
+
+// Here if x=0 and y>0
+{ .mfi
+ nop.m 999
+(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y // Test y=int
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.d.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.d.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y>0 and not integer
+(p6) tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd
+}
+;;
+
+// Note if x=0, y>0 and odd integer, just return x
+{ .mfb
+ nop.m 999
+(p11) fma.d.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y even integer
+ br.ret.sptk b0 // Exit if x=0 and y>0
+}
+;;
+
+POW_X_0_Y_0:
+// When X is +-0 and Y is +-0, IEEE returns 1.0
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 26
+ fma.d.s0 f8 = f1,f1,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+POW_X_0_Y_NEG:
+// When X is +-0 and Y is negative, IEEE returns
+// X Y answer
+// +0 -odd int +inf
+// -0 -odd int -inf
+
+// +0 !-odd int +inf
+// -0 !-odd int +inf
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// return +inf
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// return (sign_of_x)inf
+// p12 even
+// return +inf
+// p10 == Y is not an integer
+// return +inf
+//
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+;;
+
+{ .mfi
+ mov pow_GR_tag = 27
+(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p6) frcpa.s0 f8,p13 = f1, f0
+(p6) br.cond.sptk __libm_error_region // x=0, y<0, y large neg int
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p10) frcpa.s0 f8,p13 = f1, f0
+(p10) br.cond.sptk __libm_error_region // x=0, y<0, y not int
+}
+;;
+
+// x=0, y<0, y an int
+{ .mib
+ nop.m 999
+(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
+ nop.b 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s0 f8,p13 = f1,f0
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) frcpa.s0 f8,p13 = f1,f8
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+POW_Y_0:
+// Here for y zero, x anything but zero and nan
+// Set flag if x denormal
+// Result is +1.0
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag if x denormal
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.d.s0 f8 = f1,f1,f0
+ br.ret.sptk b0
+}
+;;
+
+
+POW_X_INF:
+// Here when X is +-inf
+
+// X +inf Y +inf +inf
+// X -inf Y +inf +inf
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf <== (-inf)^0.5 = +inf !!
+// X -inf Y >0, odd integer -inf
+
+// X +inf Y -inf +0
+// X -inf Y -inf +0
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X + inf Y=+0 +1
+// X + inf Y=-0 +1
+// X - inf Y=+0 +1
+// X - inf Y=-0 +1
+
+// p13 == Y negative
+// p14 == Y positive
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// p13 == (Y negative)
+// return (sign_of_x)inf
+// p14 == (Y positive)
+// return (sign_of_x)0
+// pxx even
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+// pxx == Y is not an integer
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+//
+
+// If x=inf, test y and flag denormal
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p10,p11 = f9,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.lt.s0 p13,p14 = POW_NORM_Y,f0
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fclass.m p12,p0 = f9, 0x23 //@inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p15,p0 = f9, 0x07 //@zero
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p15) fmerge.s f8 = f1,f1 // Return +1.0 if x=inf, y=0
+(p15) br.ret.spnt b0 // Exit if x=inf, y=0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) frcpa.s1 f8,p10 = f1,f0 // If x=inf, y>0, assume result +inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p13) fma.d.s0 f8 = f0,f0,f0 // If x=inf, y<0, assume result +0.0
+(p12) br.ret.spnt b0 // Exit if x=inf, y=inf
+}
+;;
+
+// Here if x=inf, and 0 < |y| < inf. Need to correct results if y odd integer.
+{ .mfi
+ nop.m 999
+(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y // Is y integer?
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0 // Test for y odd integer
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fmerge.s f8 = POW_NORM_X,f8 // If y odd integer use sign of x
+ br.ret.sptk b0 // Exit for x=inf, 0 < |y| < inf
+}
+;;
+
+
+POW_X_NEG_Y_NONINT:
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 28
+ frcpa.s0 f8,p6 = f0,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+POW_X_NAN:
+// Here if x=nan, y not nan
+{ .mfi
+ nop.m 999
+ fclass.m p9,p13 = f9, 0x07 // Test y=zero
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p13) fma.d.s0 f8 = f8,f1,f0
+(p13) br.ret.sptk b0 // Exit if x nan, y anything but zero or nan
+}
+;;
+
+POW_X_NAN_Y_0:
+// When X is a NAN and Y is zero, IEEE returns 1.
+// We call error support with this value.
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Dummy op to set invalid on snan
+ nop.i 999
+}
+{ .mfb
+ mov pow_GR_tag = 29
+ fma.d.s0 f8 = f0,f0,f1
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+POW_OVER_UNDER_X_NOT_INF:
+
+// p8 is TRUE for overflow
+// p9 is TRUE for underflow
+
+// if y is infinity, we should not over/underflow
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p14, p13 = POW_xsq,f1 // Test |x|=1
+ cmp.eq p8,p9 = pow_GR_sign_Y_Gpr, r0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fclass.m.unc p15, p0 = f9, 0x23 // If |x|=1, test y=inf
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fclass.m.unc p11,p0 = f9, 0x23 // If |x| not 1, test y=inf
+ nop.i 999
+}
+;;
+
+// p15 = TRUE if |x|=1, y=inf, return +1
+{ .mfb
+ nop.m 999
+(p15) fma.d.s0 f8 = f1,f1,f0 // If |x|=1, y=inf, result +1
+(p15) br.ret.spnt b0 // Exit if |x|=1, y=inf
+}
+;;
+
+.pred.rel "mutex",p8,p9
+{ .mfb
+(p8) setf.exp f8 = pow_GR_17ones // If exp(+big), result inf
+(p9) fmerge.s f8 = f0,f0 // If exp(-big), result 0
+(p11) br.ret.sptk b0 // Exit if |x| not 1, y=inf
+}
+;;
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.cond.sptk POW_OVER_UNDER_ERROR // Branch if y not inf
+}
+;;
+
+
+POW_Y_NAN:
+// Here if y=nan, x anything
+// If x = +1 then result is +1, else result is quiet Y
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fcmp.eq.s0 p6,p0 = f9,f1 // Set invalid, even if x=+1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fma.d.s0 f8 = f1,f1,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8 = f9,f8,f0
+ br.ret.sptk b0 // Exit y=nan
+}
+;;
+
+
+POW_OVER_UNDER_ERROR:
+// Here if we have overflow or underflow.
+// Enter with p12 true if x negative and y odd int to force -0 or -inf
+
+{ .mfi
+ sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
+ nop.f 999
+ mov pow_GR_one = 0x1
+}
+;;
+
+// overflow, force inf with O flag
+{ .mmb
+(p8) mov pow_GR_tag = 24
+(p8) setf.exp POW_tmp = pow_GR_17ones_m1
+ nop.b 999
+}
+;;
+
+// underflow, force zero with I, U flags
+{ .mmi
+(p9) mov pow_GR_tag = 25
+(p9) setf.exp POW_tmp = pow_GR_one
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.d.s0 f8 = POW_tmp, POW_tmp, f0
+ nop.i 999
+}
+;;
+
+// p12 x is negative and y is an odd integer, change sign of result
+{ .mfi
+ nop.m 999
+(p12) fnma.d.s0 f8 = POW_tmp, POW_tmp, f0
+ nop.i 999
+}
+;;
+
+GLOBAL_IEEE754_END(pow)
+libm_alias_double_other (__pow, pow)
+#ifdef SHARED
+.symver pow,pow@@GLIBC_2.29
+.weak __pow_compat
+.set __pow_compat,__pow
+.symver __pow_compat,pow@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfd [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfd [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_pow_log_data.c b/sysdeps/ia64/fpu/e_pow_log_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_pow_log_data.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_powf.S b/sysdeps/ia64/fpu/e_powf.S
new file mode 100644
index 0000000000..593e4b5162
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_powf.S
@@ -0,0 +1,2071 @@
+.file "powf.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 02/03/00 Added p12 to definite over/under path. With odd power we did not
+// maintain the sign of x in this path.
+// 04/04/00 Unwind support added
+// 04/19/00 pow(+-1,inf) now returns NaN
+// pow(+-val, +-inf) returns 0 or inf, but now does not call error
+// support
+// Added s1 to fcvt.fx because invalid flag was incorrectly set.
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 09/07/00 Improved performance by eliminating bank conflicts and other stalls,
+// and tweaking the critical path
+// 09/08/00 Per c99, pow(+-1,inf) now returns 1, and pow(+1,nan) returns 1
+// 09/28/00 Updated NaN**0 path
+// 01/20/01 Fixed denormal flag settings.
+// 02/13/01 Improved speed.
+// 03/19/01 Reordered exp polynomial to improve speed and eliminate monotonicity
+// problem in round up, down, and to zero modes. Also corrected
+// overflow result when x negative, y odd in round up, down, zero.
+// 06/14/01 Added brace missing from bundle
+// 12/10/01 Corrected case where x negative, 2^23 <= |y| < 2^24, y odd integer.
+// 02/08/02 Fixed overflow/underflow cases that were not calling error support.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 08/29/02 Improved Itanium 2 performance
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 10/09/03 Modified algorithm to improve performance, reduce table size, and
+// fix boundary case powf(2.0,-150.0)
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//==============================================================
+// float powf(float x, float y)
+//
+// Overview of operation
+//==============================================================
+//
+// Three steps...
+// 1. Log(x)
+// 2. y Log(x)
+// 3. exp(y log(x))
+//
+// This means we work with the absolute value of x and merge in the sign later.
+// Log(x) = G + delta + r -rsq/2 + p
+// G,delta depend on the exponent of x and table entries. The table entries are
+// indexed by the exponent of x, called K.
+//
+// The G and delta come out of the reduction; r is the reduced x.
+//
+// B = frcpa(x)
+// xB-1 is small means that B is the approximate inverse of x.
+//
+// Log(x) = Log( (1/B)(Bx) )
+// = Log(1/B) + Log(Bx)
+// = Log(1/B) + Log( 1 + (Bx-1))
+//
+// x = 2^K 1.x_1x_2.....x_52
+// B= frcpa(x) = 2^-k Cm
+// Log(1/B) = Log(1/(2^-K Cm))
+// Log(1/B) = Log((2^K/ Cm))
+// Log(1/B) = K Log(2) + Log(1/Cm)
+//
+// Log(x) = K Log(2) + Log(1/Cm) + Log( 1 + (Bx-1))
+//
+// If you take the significand of x, set the exponent to true 0, then Cm is
+// the frcpa. We tabulate the Log(1/Cm) values. There are 256 of them.
+// The frcpa table is indexed by 8 bits, the x_1 thru x_8.
+// m = x_1x_2...x_8 is an 8-bit index.
+//
+// Log(1/Cm) = log(1/frcpa(1+m/256)) where m goes from 0 to 255.
+//
+// We tabulate as one double, T for single precision power
+//
+// Log(x) = (K Log(2)_hi + T) + (K Log(2)_lo) + Log( 1 + (Bx-1))
+// Log(x) = G + delta + Log( 1 + (Bx-1))
+//
+// The Log( 1 + (Bx-1)) can be calculated as a series in r = Bx-1.
+//
+// Log( 1 + (Bx-1)) = r - rsq/2 + p
+// where p = r^3(P0 + P1*r + P2*r^2)
+//
+// Then,
+//
+// yLog(x) = yG + y delta + y(r-rsq/2) + yp
+// yLog(x) = Z1 + e3 + Z2 + Z3
+//
+//
+// exp(yLog(x)) = exp(Z1 + Z2) exp(Z3) exp(e3)
+//
+//
+// exp(Z3) is another series.
+// exp(e3) is approximated as f3 = 1 + e3
+//
+// exp(Z1 + Z2) = exp(Z)
+// Z (128/log2) = number of log2/128 in Z is N
+//
+// s = Z - N log2/128
+//
+// exp(Z) = exp(s) exp(N log2/128)
+//
+// exp(r) = exp(Z - N log2/128)
+//
+// r = s + d = (Z - N (log2/128)_hi) -N (log2/128)_lo
+// = Z - N (log2/128)
+//
+// Z = s+d +N (log2/128)
+//
+// exp(Z) = exp(s) (1+d) exp(N log2/128)
+//
+// N = M 128 + n
+//
+// N log2/128 = M log2 + n log2/128
+//
+// n is 8 binary digits = n_7n_6...n_1
+//
+// n log2/128 = n_7n_6n_5 16 log2/128 + n_4n_3n_2n_1 log2/128
+// n log2/128 = n_7n_6n_5 log2/8 + n_4n_3n_2n_1 log2/128
+// n log2/128 = I2 log2/8 + I1 log2/128
+//
+// N log2/128 = M log2 + I2 log2/8 + I1 log2/128
+//
+// exp(Z) = exp(s) (1+d) exp(log(2^M) + log(2^I2/8) + log(2^I1/128))
+// exp(Z) = exp(s) f12 (2^M) 2^I2/8 2^I1/128
+//
+// I1, I2 are table indices. Use a series for exp(s).
+// Then get exp(Z)
+//
+// exp(yLog(x)) = exp(Z) exp(Z3) f3
+// exp(yLog(x)) = exp(Z)f3 exp(Z3)
+// exp(yLog(x)) = A exp(Z3)
+//
+// We actually calculate exp(Z3) -1.
+// Then,
+// exp(yLog(x)) = A + A( exp(Z3) -1)
+//
+
+// Table Generation
+//==============================================================
+
+// The log values
+// ==============
+// The operation (K*log2_hi) must be exact. K is the true exponent of x.
+// If we allow gradual underflow (denormals), K can be represented in 12 bits
+// (as a two's complement number). We assume 13 bits as an engineering
+// precaution.
+//
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+// 0 1 66
+// 2 34
+//
+// So we want the lsb(log2_hi) to be 2^-50
+// We get log2 as a quad-extended (15-bit exponent, 128-bit significand)
+//
+// 0 fffe b17217f7d1cf79ab c9e3b39803f2f6af (4...)
+//
+// Consider numbering the bits left to right, starting at 0 thru 127.
+// Bit 0 is the 2^-1 bit; bit 49 is the 2^-50 bit.
+//
+// ...79ab
+// 0111 1001 1010 1011
+// 44
+// 89
+//
+// So if we shift off the rightmost 14 bits, then (shift back only
+// the top half) we get
+//
+// 0 fffe b17217f7d1cf4000 e6af278ece600fcb dabc000000000000
+//
+// Put the right 64-bit signficand in an FR register, convert to double;
+// it is exact. Put the next 128 bits into a quad register and round to double.
+// The true exponent of the low part is -51.
+//
+// hi is 0 fffe b17217f7d1cf4000
+// lo is 0 ffcc e6af278ece601000
+//
+// Convert to double memory format and get
+//
+// hi is 0x3fe62e42fefa39e8
+// lo is 0x3cccd5e4f1d9cc02
+//
+// log2_hi + log2_lo is an accurate value for log2.
+//
+//
+// The T and t values
+// ==================
+// A similar method is used to generate the T and t values.
+//
+// K * log2_hi + T must be exact.
+//
+// Smallest T,t
+// ----------
+// The smallest T,t is
+// T t
+// 0x3f60040155d58800, 0x3c93bce0ce3ddd81 log(1/frcpa(1+0/256))= +1.95503e-003
+//
+// The exponent is 0x3f6 (biased) or -9 (true).
+// For the smallest T value, what we want is to clip the significand such that
+// when it is shifted right by 9, its lsb is in the bit for 2^-51. The 9 is the
+// specific for the first entry. In general, it is 0xffff - (biased 15-bit
+// exponent).
+
+// Independently, what we have calculated is the table value as a quad
+// precision number.
+// Table entry 1 is
+// 0 fff6 80200aaeac44ef38 338f77605fdf8000
+//
+// We store this quad precision number in a data structure that is
+// sign: 1
+// exponent: 15
+// signficand_hi: 64 (includes explicit bit)
+// signficand_lo: 49
+// Because the explicit bit is included, the significand is 113 bits.
+//
+// Consider significand_hi for table entry 1.
+//
+//
+// +-+--- ... -------+--------------------+
+// | |
+// +-+--- ... -------+--------------------+
+// 0 1 4444444455555555556666
+// 2345678901234567890123
+//
+// Labeled as above, bit 0 is 2^0, bit 1 is 2^-1, etc.
+// Bit 42 is 2^-42. If we shift to the right by 9, the bit in
+// bit 42 goes in 51.
+//
+// So what we want to do is shift bits 43 thru 63 into significand_lo.
+// This is shifting bit 42 into bit 63, taking care to retain shifted-off bits.
+// Then shifting (just with signficaand_hi) back into bit 42.
+//
+// The shift_value is 63-42 = 21. In general, this is
+// 63 - (51 -(0xffff - 0xfff6))
+// For this example, it is
+// 63 - (51 - 9) = 63 - 42 = 21
+//
+// This means we are shifting 21 bits into significand_lo. We must maintain more
+// that a 128-bit signficand not to lose bits. So before the shift we put the
+// 128-bit significand into a 256-bit signficand and then shift.
+// The 256-bit significand has four parts: hh, hl, lh, and ll.
+//
+// Start off with
+// hh hl lh ll
+// <64> <49><15_0> <64_0> <64_0>
+//
+// After shift by 21 (then return for significand_hi),
+// <43><21_0> <21><43> <6><58_0> <64_0>
+//
+// Take the hh part and convert to a double. There is no rounding here.
+// The conversion is exact. The true exponent of the high part is the same as
+// the true exponent of the input quad.
+//
+// We have some 64 plus significand bits for the low part. In this example, we
+// have 70 bits. We want to round this to a double. Put them in a quad and then
+// do a quad fnorm.
+// For this example the true exponent of the low part is
+// true_exponent_of_high - 43 = true_exponent_of_high - (64-21)
+// In general, this is
+// true_exponent_of_high - (64 - shift_value)
+//
+//
+// Largest T,t
+// ----------
+// The largest T,t is
+// 0x3fe62643fecf9742, 0x3c9e3147684bd37d log(1/frcpa(1+255/256))=+6.92171e-001
+//
+// Table entry 256 is
+// 0 fffe b1321ff67cba178c 51da12f4df5a0000
+//
+// The shift value is
+// 63 - (51 -(0xffff - 0xfffe)) = 13
+//
+// The true exponent of the low part is
+// true_exponent_of_high - (64 - shift_value)
+// -1 - (64-13) = -52
+// Biased as a double, this is 0x3cb
+//
+//
+//
+// So then lsb(T) must be >= 2^-51
+// msb(Klog2_hi) <= 2^12
+//
+// +--------+---------+
+// | 51 bits | <== largest T
+// +--------+---------+
+// | 9 bits | 42 bits | <== smallest T
+// +------------+----------------+-+
+// | 13 bits | 50 bits | |
+// +------------+----------------+-+
+//
+// Note: For powf only the table of T is needed
+
+
+// Special Cases
+//==============================================================
+
+// double float
+// overflow error 24 30
+
+// underflow error 25 31
+
+// X zero Y zero
+// +0 +0 +1 error 26 32
+// -0 +0 +1 error 26 32
+// +0 -0 +1 error 26 32
+// -0 -0 +1 error 26 32
+
+// X zero Y negative
+// +0 -odd integer +inf error 27 33 divide-by-zero
+// -0 -odd integer -inf error 27 33 divide-by-zero
+// +0 !-odd integer +inf error 27 33 divide-by-zero
+// -0 !-odd integer +inf error 27 33 divide-by-zero
+// +0 -inf +inf error 27 33 divide-by-zero
+// -0 -inf +inf error 27 33 divide-by-zero
+
+// X zero Y positive
+// +0 +odd integer +0
+// -0 +odd integer -0
+// +0 !+odd integer +0
+// -0 !+odd integer +0
+// +0 +inf +0
+// -0 +inf +0
+// +0 Y NaN quiet Y invalid if Y SNaN
+// -0 Y NaN quiet Y invalid if Y SNaN
+
+// X one
+// -1 Y inf +1
+// -1 Y NaN quiet Y invalid if Y SNaN
+// +1 Y NaN +1 invalid if Y SNaN
+// +1 Y any else +1
+
+// X - Y not integer QNAN error 28 34 invalid
+
+// X NaN Y 0 +1 error 29 35
+// X NaN Y NaN quiet X invalid if X or Y SNaN
+// X NaN Y any else quiet X invalid if X SNaN
+// X !+1 Y NaN quiet Y invalid if Y SNaN
+
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf
+// X -inf Y >0, odd integer -inf
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X +inf Y =0 +1
+// X -inf Y =0 +1
+
+// |X|<1 Y +inf +0
+// |X|<1 Y -inf +inf
+// |X|>1 Y +inf +inf
+// |X|>1 Y -inf +0
+
+// X any Y =0 +1
+
+// Assembly macros
+//==============================================================
+
+// integer registers used
+
+pow_GR_exp_half = r10
+pow_GR_signexp_Xm1 = r11
+pow_GR_tmp = r11
+
+pow_GR_signexp_X = r14
+pow_GR_17ones = r15
+pow_GR_Fpsr = r15
+pow_AD_P = r16
+pow_GR_rcs0_mask = r16
+pow_GR_exp_2tom8 = r17
+pow_GR_rcs0 = r17
+pow_GR_sig_X = r18
+pow_GR_10033 = r19
+pow_GR_16ones = r20
+
+pow_AD_Tt = r21
+pow_GR_exp_X = r22
+pow_AD_Q = r23
+pow_GR_true_exp_X = r24
+pow_GR_y_zero = r25
+
+pow_GR_exp_Y = r26
+pow_AD_tbl1 = r27
+pow_AD_tbl2 = r28
+pow_GR_offset = r29
+pow_GR_exp_Xm1 = r30
+pow_GR_xneg_yodd = r31
+
+pow_GR_int_N = r38
+pow_GR_index1 = r39
+pow_GR_index2 = r40
+
+pow_AD_T1 = r41
+pow_AD_T2 = r42
+pow_int_GR_M = r43
+pow_GR_sig_int_Y = r44
+pow_GR_sign_Y_Gpr = r45
+
+pow_GR_17ones_m1 = r46
+pow_GR_one = r47
+pow_GR_sign_Y = r48
+pow_GR_signexp_Y_Gpr = r49
+pow_GR_exp_Y_Gpr = r50
+
+pow_GR_true_exp_Y_Gpr = r51
+pow_GR_signexp_Y = r52
+pow_GR_x_one = r53
+pow_GR_big_pos = r55
+
+pow_GR_big_neg = r56
+
+GR_SAVE_B0 = r50
+GR_SAVE_GP = r51
+GR_SAVE_PFS = r52
+
+GR_Parameter_X = r53
+GR_Parameter_Y = r54
+GR_Parameter_RESULT = r55
+pow_GR_tag = r56
+
+
+// floating point registers used
+
+POW_B = f32
+POW_NORM_X = f33
+POW_Xm1 = f34
+POW_r1 = f34
+
+POW_NORM_Y = f37
+POW_Q2 = f38
+POW_eps = f39
+POW_P2 = f40
+
+POW_P0 = f42
+POW_log2_lo = f43
+POW_r = f44
+POW_Q0_half = f45
+
+POW_tmp = f47
+POW_log2_hi = f48
+POW_Q1 = f49
+POW_P1 = f50
+
+POW_log2_by_128_hi = f51
+POW_inv_log2_by_128 = f52
+POW_rsq = f53
+POW_Yrcub = f54
+POW_log2_by_128_lo = f55
+
+POW_xsq = f57
+POW_v2 = f59
+POW_T = f60
+
+POW_RSHF = f62
+POW_v210 = f63
+POW_twoV = f65
+
+POW_U = f66
+POW_G = f67
+POW_delta = f68
+POW_V = f70
+
+POW_p = f71
+POW_Z = f72
+POW_e3 = f73
+POW_Z2 = f75
+
+POW_W1 = f77
+POW_Z3 = f80
+
+POW_Z3sq = f85
+
+POW_Nfloat = f87
+POW_f3 = f89
+POW_q = f90
+
+POW_T1 = f96
+POW_T2 = f97
+POW_2M = f98
+POW_s = f99
+POW_f12 = f100
+
+POW_ssq = f101
+POW_T1T2 = f102
+POW_1ps = f103
+POW_A = f104
+POW_es = f105
+
+POW_Xp1 = f106
+POW_int_K = f107
+POW_K = f108
+POW_f123 = f109
+POW_Gpr = f110
+
+POW_Y_Gpr = f111
+POW_int_Y = f112
+POW_2Mqp1 = f113
+
+POW_float_int_Y = f116
+POW_ftz_urm_f8 = f117
+POW_wre_urm_f8 = f118
+POW_big_neg = f119
+POW_big_pos = f120
+
+// Data tables
+//==============================================================
+
+RODATA
+
+.align 16
+
+LOCAL_OBJECT_START(pow_table_P)
+data8 0x80000000000018E5, 0x0000BFFD // P_1
+data8 0xb8aa3b295c17f0bc, 0x00004006 // inv_ln2_by_128
+//
+//
+data8 0x3FA5555555554A9E // Q_2
+data8 0x0000000000000000 // Pad
+data8 0x3FC5555555554733 // Q_1
+data8 0x43e8000000000000 // Right shift constant for exp
+data8 0xc9e3b39803f2f6af, 0x00003fb7 // ln2_by_128_lo
+LOCAL_OBJECT_END(pow_table_P)
+
+LOCAL_OBJECT_START(pow_table_Q)
+data8 0xCCCCCCCC4ED2BA7F, 0x00003FFC // P_2
+data8 0xAAAAAAAAAAAAB505, 0x00003FFD // P_0
+data8 0x3fe62e42fefa39e8, 0x3cccd5e4f1d9cc02 // log2 hi lo = +6.93147e-001
+data8 0xb17217f7d1cf79ab, 0x00003ff7 // ln2_by_128_hi
+LOCAL_OBJECT_END(pow_table_Q)
+
+
+LOCAL_OBJECT_START(pow_Tt)
+data8 0x3f60040155d58800 // log(1/frcpa(1+0/256))= +1.95503e-003
+data8 0x3f78121214586a00 // log(1/frcpa(1+1/256))= +5.87661e-003
+data8 0x3f841929f9683200 // log(1/frcpa(1+2/256))= +9.81362e-003
+data8 0x3f8c317384c75f00 // log(1/frcpa(1+3/256))= +1.37662e-002
+data8 0x3f91a6b91ac73380 // log(1/frcpa(1+4/256))= +1.72376e-002
+data8 0x3f95ba9a5d9ac000 // log(1/frcpa(1+5/256))= +2.12196e-002
+data8 0x3f99d2a807432580 // log(1/frcpa(1+6/256))= +2.52177e-002
+data8 0x3f9d6b2725979800 // log(1/frcpa(1+7/256))= +2.87291e-002
+data8 0x3fa0c58fa19dfa80 // log(1/frcpa(1+8/256))= +3.27573e-002
+data8 0x3fa2954c78cbce00 // log(1/frcpa(1+9/256))= +3.62953e-002
+data8 0x3fa4a94d2da96c40 // log(1/frcpa(1+10/256))= +4.03542e-002
+data8 0x3fa67c94f2d4bb40 // log(1/frcpa(1+11/256))= +4.39192e-002
+data8 0x3fa85188b630f040 // log(1/frcpa(1+12/256))= +4.74971e-002
+data8 0x3faa6b8abe73af40 // log(1/frcpa(1+13/256))= +5.16017e-002
+data8 0x3fac441e06f72a80 // log(1/frcpa(1+14/256))= +5.52072e-002
+data8 0x3fae1e6713606d00 // log(1/frcpa(1+15/256))= +5.88257e-002
+data8 0x3faffa6911ab9300 // log(1/frcpa(1+16/256))= +6.24574e-002
+data8 0x3fb0ec139c5da600 // log(1/frcpa(1+17/256))= +6.61022e-002
+data8 0x3fb1dbd2643d1900 // log(1/frcpa(1+18/256))= +6.97605e-002
+data8 0x3fb2cc7284fe5f00 // log(1/frcpa(1+19/256))= +7.34321e-002
+data8 0x3fb3bdf5a7d1ee60 // log(1/frcpa(1+20/256))= +7.71173e-002
+data8 0x3fb4b05d7aa012e0 // log(1/frcpa(1+21/256))= +8.08161e-002
+data8 0x3fb580db7ceb5700 // log(1/frcpa(1+22/256))= +8.39975e-002
+data8 0x3fb674f089365a60 // log(1/frcpa(1+23/256))= +8.77219e-002
+data8 0x3fb769ef2c6b5680 // log(1/frcpa(1+24/256))= +9.14602e-002
+data8 0x3fb85fd927506a40 // log(1/frcpa(1+25/256))= +9.52125e-002
+data8 0x3fb9335e5d594980 // log(1/frcpa(1+26/256))= +9.84401e-002
+data8 0x3fba2b0220c8e5e0 // log(1/frcpa(1+27/256))= +1.02219e-001
+data8 0x3fbb0004ac1a86a0 // log(1/frcpa(1+28/256))= +1.05469e-001
+data8 0x3fbbf968769fca00 // log(1/frcpa(1+29/256))= +1.09274e-001
+data8 0x3fbccfedbfee13a0 // log(1/frcpa(1+30/256))= +1.12548e-001
+data8 0x3fbda727638446a0 // log(1/frcpa(1+31/256))= +1.15832e-001
+data8 0x3fbea3257fe10f60 // log(1/frcpa(1+32/256))= +1.19677e-001
+data8 0x3fbf7be9fedbfde0 // log(1/frcpa(1+33/256))= +1.22985e-001
+data8 0x3fc02ab352ff25f0 // log(1/frcpa(1+34/256))= +1.26303e-001
+data8 0x3fc097ce579d2040 // log(1/frcpa(1+35/256))= +1.29633e-001
+data8 0x3fc1178e8227e470 // log(1/frcpa(1+36/256))= +1.33531e-001
+data8 0x3fc185747dbecf30 // log(1/frcpa(1+37/256))= +1.36885e-001
+data8 0x3fc1f3b925f25d40 // log(1/frcpa(1+38/256))= +1.40250e-001
+data8 0x3fc2625d1e6ddf50 // log(1/frcpa(1+39/256))= +1.43627e-001
+data8 0x3fc2d1610c868130 // log(1/frcpa(1+40/256))= +1.47015e-001
+data8 0x3fc340c597411420 // log(1/frcpa(1+41/256))= +1.50414e-001
+data8 0x3fc3b08b6757f2a0 // log(1/frcpa(1+42/256))= +1.53825e-001
+data8 0x3fc40dfb08378000 // log(1/frcpa(1+43/256))= +1.56677e-001
+data8 0x3fc47e74e8ca5f70 // log(1/frcpa(1+44/256))= +1.60109e-001
+data8 0x3fc4ef51f6466de0 // log(1/frcpa(1+45/256))= +1.63553e-001
+data8 0x3fc56092e02ba510 // log(1/frcpa(1+46/256))= +1.67010e-001
+data8 0x3fc5d23857cd74d0 // log(1/frcpa(1+47/256))= +1.70478e-001
+data8 0x3fc6313a37335d70 // log(1/frcpa(1+48/256))= +1.73377e-001
+data8 0x3fc6a399dabbd380 // log(1/frcpa(1+49/256))= +1.76868e-001
+data8 0x3fc70337dd3ce410 // log(1/frcpa(1+50/256))= +1.79786e-001
+data8 0x3fc77654128f6120 // log(1/frcpa(1+51/256))= +1.83299e-001
+data8 0x3fc7e9d82a0b0220 // log(1/frcpa(1+52/256))= +1.86824e-001
+data8 0x3fc84a6b759f5120 // log(1/frcpa(1+53/256))= +1.89771e-001
+data8 0x3fc8ab47d5f5a300 // log(1/frcpa(1+54/256))= +1.92727e-001
+data8 0x3fc91fe490965810 // log(1/frcpa(1+55/256))= +1.96286e-001
+data8 0x3fc981634011aa70 // log(1/frcpa(1+56/256))= +1.99261e-001
+data8 0x3fc9f6c407089660 // log(1/frcpa(1+57/256))= +2.02843e-001
+data8 0x3fca58e729348f40 // log(1/frcpa(1+58/256))= +2.05838e-001
+data8 0x3fcabb55c31693a0 // log(1/frcpa(1+59/256))= +2.08842e-001
+data8 0x3fcb1e104919efd0 // log(1/frcpa(1+60/256))= +2.11855e-001
+data8 0x3fcb94ee93e367c0 // log(1/frcpa(1+61/256))= +2.15483e-001
+data8 0x3fcbf851c0675550 // log(1/frcpa(1+62/256))= +2.18516e-001
+data8 0x3fcc5c0254bf23a0 // log(1/frcpa(1+63/256))= +2.21558e-001
+data8 0x3fccc000c9db3c50 // log(1/frcpa(1+64/256))= +2.24609e-001
+data8 0x3fcd244d99c85670 // log(1/frcpa(1+65/256))= +2.27670e-001
+data8 0x3fcd88e93fb2f450 // log(1/frcpa(1+66/256))= +2.30741e-001
+data8 0x3fcdedd437eaef00 // log(1/frcpa(1+67/256))= +2.33820e-001
+data8 0x3fce530effe71010 // log(1/frcpa(1+68/256))= +2.36910e-001
+data8 0x3fceb89a1648b970 // log(1/frcpa(1+69/256))= +2.40009e-001
+data8 0x3fcf1e75fadf9bd0 // log(1/frcpa(1+70/256))= +2.43117e-001
+data8 0x3fcf84a32ead7c30 // log(1/frcpa(1+71/256))= +2.46235e-001
+data8 0x3fcfeb2233ea07c0 // log(1/frcpa(1+72/256))= +2.49363e-001
+data8 0x3fd028f9c7035c18 // log(1/frcpa(1+73/256))= +2.52501e-001
+data8 0x3fd05c8be0d96358 // log(1/frcpa(1+74/256))= +2.55649e-001
+data8 0x3fd085eb8f8ae790 // log(1/frcpa(1+75/256))= +2.58174e-001
+data8 0x3fd0b9c8e32d1910 // log(1/frcpa(1+76/256))= +2.61339e-001
+data8 0x3fd0edd060b78080 // log(1/frcpa(1+77/256))= +2.64515e-001
+data8 0x3fd122024cf00638 // log(1/frcpa(1+78/256))= +2.67701e-001
+data8 0x3fd14be2927aecd0 // log(1/frcpa(1+79/256))= +2.70257e-001
+data8 0x3fd180618ef18ad8 // log(1/frcpa(1+80/256))= +2.73461e-001
+data8 0x3fd1b50bbe2fc638 // log(1/frcpa(1+81/256))= +2.76675e-001
+data8 0x3fd1df4cc7cf2428 // log(1/frcpa(1+82/256))= +2.79254e-001
+data8 0x3fd214456d0eb8d0 // log(1/frcpa(1+83/256))= +2.82487e-001
+data8 0x3fd23ec5991eba48 // log(1/frcpa(1+84/256))= +2.85081e-001
+data8 0x3fd2740d9f870af8 // log(1/frcpa(1+85/256))= +2.88333e-001
+data8 0x3fd29ecdabcdfa00 // log(1/frcpa(1+86/256))= +2.90943e-001
+data8 0x3fd2d46602adcce8 // log(1/frcpa(1+87/256))= +2.94214e-001
+data8 0x3fd2ff66b04ea9d0 // log(1/frcpa(1+88/256))= +2.96838e-001
+data8 0x3fd335504b355a30 // log(1/frcpa(1+89/256))= +3.00129e-001
+data8 0x3fd360925ec44f58 // log(1/frcpa(1+90/256))= +3.02769e-001
+data8 0x3fd38bf1c3337e70 // log(1/frcpa(1+91/256))= +3.05417e-001
+data8 0x3fd3c25277333180 // log(1/frcpa(1+92/256))= +3.08735e-001
+data8 0x3fd3edf463c16838 // log(1/frcpa(1+93/256))= +3.11399e-001
+data8 0x3fd419b423d5e8c0 // log(1/frcpa(1+94/256))= +3.14069e-001
+data8 0x3fd44591e0539f48 // log(1/frcpa(1+95/256))= +3.16746e-001
+data8 0x3fd47c9175b6f0a8 // log(1/frcpa(1+96/256))= +3.20103e-001
+data8 0x3fd4a8b341552b08 // log(1/frcpa(1+97/256))= +3.22797e-001
+data8 0x3fd4d4f390890198 // log(1/frcpa(1+98/256))= +3.25498e-001
+data8 0x3fd501528da1f960 // log(1/frcpa(1+99/256))= +3.28206e-001
+data8 0x3fd52dd06347d4f0 // log(1/frcpa(1+100/256))= +3.30921e-001
+data8 0x3fd55a6d3c7b8a88 // log(1/frcpa(1+101/256))= +3.33644e-001
+data8 0x3fd5925d2b112a58 // log(1/frcpa(1+102/256))= +3.37058e-001
+data8 0x3fd5bf406b543db0 // log(1/frcpa(1+103/256))= +3.39798e-001
+data8 0x3fd5ec433d5c35a8 // log(1/frcpa(1+104/256))= +3.42545e-001
+data8 0x3fd61965cdb02c18 // log(1/frcpa(1+105/256))= +3.45300e-001
+data8 0x3fd646a84935b2a0 // log(1/frcpa(1+106/256))= +3.48063e-001
+data8 0x3fd6740add31de90 // log(1/frcpa(1+107/256))= +3.50833e-001
+data8 0x3fd6a18db74a58c0 // log(1/frcpa(1+108/256))= +3.53610e-001
+data8 0x3fd6cf31058670e8 // log(1/frcpa(1+109/256))= +3.56396e-001
+data8 0x3fd6f180e852f0b8 // log(1/frcpa(1+110/256))= +3.58490e-001
+data8 0x3fd71f5d71b894e8 // log(1/frcpa(1+111/256))= +3.61289e-001
+data8 0x3fd74d5aefd66d58 // log(1/frcpa(1+112/256))= +3.64096e-001
+data8 0x3fd77b79922bd378 // log(1/frcpa(1+113/256))= +3.66911e-001
+data8 0x3fd7a9b9889f19e0 // log(1/frcpa(1+114/256))= +3.69734e-001
+data8 0x3fd7d81b037eb6a0 // log(1/frcpa(1+115/256))= +3.72565e-001
+data8 0x3fd8069e33827230 // log(1/frcpa(1+116/256))= +3.75404e-001
+data8 0x3fd82996d3ef8bc8 // log(1/frcpa(1+117/256))= +3.77538e-001
+data8 0x3fd85855776dcbf8 // log(1/frcpa(1+118/256))= +3.80391e-001
+data8 0x3fd8873658327cc8 // log(1/frcpa(1+119/256))= +3.83253e-001
+data8 0x3fd8aa75973ab8c8 // log(1/frcpa(1+120/256))= +3.85404e-001
+data8 0x3fd8d992dc8824e0 // log(1/frcpa(1+121/256))= +3.88280e-001
+data8 0x3fd908d2ea7d9510 // log(1/frcpa(1+122/256))= +3.91164e-001
+data8 0x3fd92c59e79c0e50 // log(1/frcpa(1+123/256))= +3.93332e-001
+data8 0x3fd95bd750ee3ed0 // log(1/frcpa(1+124/256))= +3.96231e-001
+data8 0x3fd98b7811a3ee58 // log(1/frcpa(1+125/256))= +3.99138e-001
+data8 0x3fd9af47f33d4068 // log(1/frcpa(1+126/256))= +4.01323e-001
+data8 0x3fd9df270c1914a0 // log(1/frcpa(1+127/256))= +4.04245e-001
+data8 0x3fda0325ed14fda0 // log(1/frcpa(1+128/256))= +4.06442e-001
+data8 0x3fda33440224fa78 // log(1/frcpa(1+129/256))= +4.09379e-001
+data8 0x3fda57725e80c380 // log(1/frcpa(1+130/256))= +4.11587e-001
+data8 0x3fda87d0165dd198 // log(1/frcpa(1+131/256))= +4.14539e-001
+data8 0x3fdaac2e6c03f890 // log(1/frcpa(1+132/256))= +4.16759e-001
+data8 0x3fdadccc6fdf6a80 // log(1/frcpa(1+133/256))= +4.19726e-001
+data8 0x3fdb015b3eb1e790 // log(1/frcpa(1+134/256))= +4.21958e-001
+data8 0x3fdb323a3a635948 // log(1/frcpa(1+135/256))= +4.24941e-001
+data8 0x3fdb56fa04462908 // log(1/frcpa(1+136/256))= +4.27184e-001
+data8 0x3fdb881aa659bc90 // log(1/frcpa(1+137/256))= +4.30182e-001
+data8 0x3fdbad0bef3db160 // log(1/frcpa(1+138/256))= +4.32437e-001
+data8 0x3fdbd21297781c28 // log(1/frcpa(1+139/256))= +4.34697e-001
+data8 0x3fdc039236f08818 // log(1/frcpa(1+140/256))= +4.37718e-001
+data8 0x3fdc28cb1e4d32f8 // log(1/frcpa(1+141/256))= +4.39990e-001
+data8 0x3fdc4e19b84723c0 // log(1/frcpa(1+142/256))= +4.42267e-001
+data8 0x3fdc7ff9c74554c8 // log(1/frcpa(1+143/256))= +4.45311e-001
+data8 0x3fdca57b64e9db00 // log(1/frcpa(1+144/256))= +4.47600e-001
+data8 0x3fdccb130a5ceba8 // log(1/frcpa(1+145/256))= +4.49895e-001
+data8 0x3fdcf0c0d18f3268 // log(1/frcpa(1+146/256))= +4.52194e-001
+data8 0x3fdd232075b5a200 // log(1/frcpa(1+147/256))= +4.55269e-001
+data8 0x3fdd490246defa68 // log(1/frcpa(1+148/256))= +4.57581e-001
+data8 0x3fdd6efa918d25c8 // log(1/frcpa(1+149/256))= +4.59899e-001
+data8 0x3fdd9509707ae528 // log(1/frcpa(1+150/256))= +4.62221e-001
+data8 0x3fddbb2efe92c550 // log(1/frcpa(1+151/256))= +4.64550e-001
+data8 0x3fddee2f3445e4a8 // log(1/frcpa(1+152/256))= +4.67663e-001
+data8 0x3fde148a1a2726c8 // log(1/frcpa(1+153/256))= +4.70004e-001
+data8 0x3fde3afc0a49ff38 // log(1/frcpa(1+154/256))= +4.72350e-001
+data8 0x3fde6185206d5168 // log(1/frcpa(1+155/256))= +4.74702e-001
+data8 0x3fde882578823d50 // log(1/frcpa(1+156/256))= +4.77060e-001
+data8 0x3fdeaedd2eac9908 // log(1/frcpa(1+157/256))= +4.79423e-001
+data8 0x3fded5ac5f436be0 // log(1/frcpa(1+158/256))= +4.81792e-001
+data8 0x3fdefc9326d16ab8 // log(1/frcpa(1+159/256))= +4.84166e-001
+data8 0x3fdf2391a21575f8 // log(1/frcpa(1+160/256))= +4.86546e-001
+data8 0x3fdf4aa7ee031928 // log(1/frcpa(1+161/256))= +4.88932e-001
+data8 0x3fdf71d627c30bb0 // log(1/frcpa(1+162/256))= +4.91323e-001
+data8 0x3fdf991c6cb3b378 // log(1/frcpa(1+163/256))= +4.93720e-001
+data8 0x3fdfc07ada69a908 // log(1/frcpa(1+164/256))= +4.96123e-001
+data8 0x3fdfe7f18eb03d38 // log(1/frcpa(1+165/256))= +4.98532e-001
+data8 0x3fe007c053c5002c // log(1/frcpa(1+166/256))= +5.00946e-001
+data8 0x3fe01b942198a5a0 // log(1/frcpa(1+167/256))= +5.03367e-001
+data8 0x3fe02f74400c64e8 // log(1/frcpa(1+168/256))= +5.05793e-001
+data8 0x3fe04360be7603ac // log(1/frcpa(1+169/256))= +5.08225e-001
+data8 0x3fe05759ac47fe30 // log(1/frcpa(1+170/256))= +5.10663e-001
+data8 0x3fe06b5f1911cf50 // log(1/frcpa(1+171/256))= +5.13107e-001
+data8 0x3fe078bf0533c568 // log(1/frcpa(1+172/256))= +5.14740e-001
+data8 0x3fe08cd9687e7b0c // log(1/frcpa(1+173/256))= +5.17194e-001
+data8 0x3fe0a10074cf9018 // log(1/frcpa(1+174/256))= +5.19654e-001
+data8 0x3fe0b5343a234474 // log(1/frcpa(1+175/256))= +5.22120e-001
+data8 0x3fe0c974c89431cc // log(1/frcpa(1+176/256))= +5.24592e-001
+data8 0x3fe0ddc2305b9884 // log(1/frcpa(1+177/256))= +5.27070e-001
+data8 0x3fe0eb524bafc918 // log(1/frcpa(1+178/256))= +5.28726e-001
+data8 0x3fe0ffb54213a474 // log(1/frcpa(1+179/256))= +5.31214e-001
+data8 0x3fe114253da97d9c // log(1/frcpa(1+180/256))= +5.33709e-001
+data8 0x3fe128a24f1d9afc // log(1/frcpa(1+181/256))= +5.36210e-001
+data8 0x3fe1365252bf0864 // log(1/frcpa(1+182/256))= +5.37881e-001
+data8 0x3fe14ae558b4a92c // log(1/frcpa(1+183/256))= +5.40393e-001
+data8 0x3fe15f85a19c7658 // log(1/frcpa(1+184/256))= +5.42910e-001
+data8 0x3fe16d4d38c119f8 // log(1/frcpa(1+185/256))= +5.44592e-001
+data8 0x3fe18203c20dd130 // log(1/frcpa(1+186/256))= +5.47121e-001
+data8 0x3fe196c7bc4b1f38 // log(1/frcpa(1+187/256))= +5.49656e-001
+data8 0x3fe1a4a738b7a33c // log(1/frcpa(1+188/256))= +5.51349e-001
+data8 0x3fe1b981c0c9653c // log(1/frcpa(1+189/256))= +5.53895e-001
+data8 0x3fe1ce69e8bb1068 // log(1/frcpa(1+190/256))= +5.56447e-001
+data8 0x3fe1dc619de06944 // log(1/frcpa(1+191/256))= +5.58152e-001
+data8 0x3fe1f160a2ad0da0 // log(1/frcpa(1+192/256))= +5.60715e-001
+data8 0x3fe2066d7740737c // log(1/frcpa(1+193/256))= +5.63285e-001
+data8 0x3fe2147dba47a390 // log(1/frcpa(1+194/256))= +5.65001e-001
+data8 0x3fe229a1bc5ebac0 // log(1/frcpa(1+195/256))= +5.67582e-001
+data8 0x3fe237c1841a502c // log(1/frcpa(1+196/256))= +5.69306e-001
+data8 0x3fe24cfce6f80d98 // log(1/frcpa(1+197/256))= +5.71898e-001
+data8 0x3fe25b2c55cd5760 // log(1/frcpa(1+198/256))= +5.73630e-001
+data8 0x3fe2707f4d5f7c40 // log(1/frcpa(1+199/256))= +5.76233e-001
+data8 0x3fe285e0842ca380 // log(1/frcpa(1+200/256))= +5.78842e-001
+data8 0x3fe294294708b770 // log(1/frcpa(1+201/256))= +5.80586e-001
+data8 0x3fe2a9a2670aff0c // log(1/frcpa(1+202/256))= +5.83207e-001
+data8 0x3fe2b7fb2c8d1cc0 // log(1/frcpa(1+203/256))= +5.84959e-001
+data8 0x3fe2c65a6395f5f4 // log(1/frcpa(1+204/256))= +5.86713e-001
+data8 0x3fe2dbf557b0df40 // log(1/frcpa(1+205/256))= +5.89350e-001
+data8 0x3fe2ea64c3f97654 // log(1/frcpa(1+206/256))= +5.91113e-001
+data8 0x3fe3001823684d70 // log(1/frcpa(1+207/256))= +5.93762e-001
+data8 0x3fe30e97e9a8b5cc // log(1/frcpa(1+208/256))= +5.95531e-001
+data8 0x3fe32463ebdd34e8 // log(1/frcpa(1+209/256))= +5.98192e-001
+data8 0x3fe332f4314ad794 // log(1/frcpa(1+210/256))= +5.99970e-001
+data8 0x3fe348d90e7464cc // log(1/frcpa(1+211/256))= +6.02643e-001
+data8 0x3fe35779f8c43d6c // log(1/frcpa(1+212/256))= +6.04428e-001
+data8 0x3fe36621961a6a98 // log(1/frcpa(1+213/256))= +6.06217e-001
+data8 0x3fe37c299f3c3668 // log(1/frcpa(1+214/256))= +6.08907e-001
+data8 0x3fe38ae2171976e4 // log(1/frcpa(1+215/256))= +6.10704e-001
+data8 0x3fe399a157a603e4 // log(1/frcpa(1+216/256))= +6.12504e-001
+data8 0x3fe3afccfe77b9d0 // log(1/frcpa(1+217/256))= +6.15210e-001
+data8 0x3fe3be9d503533b4 // log(1/frcpa(1+218/256))= +6.17018e-001
+data8 0x3fe3cd7480b4a8a0 // log(1/frcpa(1+219/256))= +6.18830e-001
+data8 0x3fe3e3c43918f76c // log(1/frcpa(1+220/256))= +6.21554e-001
+data8 0x3fe3f2acb27ed6c4 // log(1/frcpa(1+221/256))= +6.23373e-001
+data8 0x3fe4019c2125ca90 // log(1/frcpa(1+222/256))= +6.25197e-001
+data8 0x3fe4181061389720 // log(1/frcpa(1+223/256))= +6.27937e-001
+data8 0x3fe42711518df544 // log(1/frcpa(1+224/256))= +6.29769e-001
+data8 0x3fe436194e12b6bc // log(1/frcpa(1+225/256))= +6.31604e-001
+data8 0x3fe445285d68ea68 // log(1/frcpa(1+226/256))= +6.33442e-001
+data8 0x3fe45bcc464c8938 // log(1/frcpa(1+227/256))= +6.36206e-001
+data8 0x3fe46aed21f117fc // log(1/frcpa(1+228/256))= +6.38053e-001
+data8 0x3fe47a1527e8a2d0 // log(1/frcpa(1+229/256))= +6.39903e-001
+data8 0x3fe489445efffcc8 // log(1/frcpa(1+230/256))= +6.41756e-001
+data8 0x3fe4a018bcb69834 // log(1/frcpa(1+231/256))= +6.44543e-001
+data8 0x3fe4af5a0c9d65d4 // log(1/frcpa(1+232/256))= +6.46405e-001
+data8 0x3fe4bea2a5bdbe84 // log(1/frcpa(1+233/256))= +6.48271e-001
+data8 0x3fe4cdf28f10ac44 // log(1/frcpa(1+234/256))= +6.50140e-001
+data8 0x3fe4dd49cf994058 // log(1/frcpa(1+235/256))= +6.52013e-001
+data8 0x3fe4eca86e64a680 // log(1/frcpa(1+236/256))= +6.53889e-001
+data8 0x3fe503c43cd8eb68 // log(1/frcpa(1+237/256))= +6.56710e-001
+data8 0x3fe513356667fc54 // log(1/frcpa(1+238/256))= +6.58595e-001
+data8 0x3fe522ae0738a3d4 // log(1/frcpa(1+239/256))= +6.60483e-001
+data8 0x3fe5322e26867854 // log(1/frcpa(1+240/256))= +6.62376e-001
+data8 0x3fe541b5cb979808 // log(1/frcpa(1+241/256))= +6.64271e-001
+data8 0x3fe55144fdbcbd60 // log(1/frcpa(1+242/256))= +6.66171e-001
+data8 0x3fe560dbc45153c4 // log(1/frcpa(1+243/256))= +6.68074e-001
+data8 0x3fe5707a26bb8c64 // log(1/frcpa(1+244/256))= +6.69980e-001
+data8 0x3fe587f60ed5b8fc // log(1/frcpa(1+245/256))= +6.72847e-001
+data8 0x3fe597a7977c8f30 // log(1/frcpa(1+246/256))= +6.74763e-001
+data8 0x3fe5a760d634bb88 // log(1/frcpa(1+247/256))= +6.76682e-001
+data8 0x3fe5b721d295f10c // log(1/frcpa(1+248/256))= +6.78605e-001
+data8 0x3fe5c6ea94431ef8 // log(1/frcpa(1+249/256))= +6.80532e-001
+data8 0x3fe5d6bb22ea86f4 // log(1/frcpa(1+250/256))= +6.82462e-001
+data8 0x3fe5e6938645d38c // log(1/frcpa(1+251/256))= +6.84397e-001
+data8 0x3fe5f673c61a2ed0 // log(1/frcpa(1+252/256))= +6.86335e-001
+data8 0x3fe6065bea385924 // log(1/frcpa(1+253/256))= +6.88276e-001
+data8 0x3fe6164bfa7cc068 // log(1/frcpa(1+254/256))= +6.90222e-001
+data8 0x3fe62643fecf9740 // log(1/frcpa(1+255/256))= +6.92171e-001
+LOCAL_OBJECT_END(pow_Tt)
+
+
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+LOCAL_OBJECT_START(pow_tbl1)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(pow_tbl1)
+
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(pow_tbl2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(pow_tbl2)
+
+.section .text
+GLOBAL_IEEE754_ENTRY(powf)
+
+// Get exponent of x. Will be used to calculate K.
+{ .mfi
+ getf.exp pow_GR_signexp_X = f8
+ fms.s1 POW_Xm1 = f8,f1,f1 // Will be used for r1 if x>0
+ mov pow_GR_17ones = 0x1FFFF
+}
+{ .mfi
+ addl pow_AD_P = @ltoff(pow_table_P), gp
+ fma.s1 POW_Xp1 = f8,f1,f1 // Will be used for r1 if x<0
+ nop.i 999
+}
+;;
+
+// Get significand of x. Will be used to get index to fetch T, Tt.
+{ .mfi
+ getf.sig pow_GR_sig_X = f8
+ frcpa.s1 POW_B, p6 = f1,f8
+ mov pow_GR_exp_half = 0xFFFE // Exponent for 0.5
+}
+{ .mfi
+ ld8 pow_AD_P = [pow_AD_P]
+ fma.s1 POW_NORM_X = f8,f1,f0
+ mov pow_GR_exp_2tom8 = 0xFFF7
+}
+;;
+
+// DOUBLE 0x10033 exponent limit at which y is an integer
+{ .mfi
+ nop.m 999
+ fcmp.lt.s1 p8,p9 = f8, f0 // Test for x<0
+ addl pow_GR_10033 = 0x10033, r0
+}
+{ .mfi
+ mov pow_GR_16ones = 0xFFFF
+ fma.s1 POW_NORM_Y = f9,f1,f0
+ nop.i 999
+}
+;;
+
+// p13 = TRUE ==> X is unorm
+{ .mfi
+ setf.exp POW_Q0_half = pow_GR_exp_half // Form 0.5
+ fclass.m p13,p0 = f8, 0x0b // Test for x unorm
+ adds pow_AD_Tt = pow_Tt - pow_table_P, pow_AD_P
+}
+{ .mfi
+ adds pow_AD_Q = pow_table_Q - pow_table_P, pow_AD_P
+ nop.f 999
+ nop.i 999
+}
+;;
+
+// p14 = TRUE ==> X is ZERO
+{ .mfi
+ ldfe POW_P2 = [pow_AD_Q], 16
+ fclass.m p14,p0 = f8, 0x07
+ nop.i 999
+}
+// Note POW_Xm1 and POW_r1 are used interchangeably
+{ .mfb
+ nop.m 999
+(p8) fnma.s1 POW_Xm1 = POW_Xp1,f1,f0
+(p13) br.cond.spnt POW_X_DENORM
+}
+;;
+
+// Continue normal and denormal paths here
+POW_COMMON:
+// p11 = TRUE ==> Y is a NAN
+{ .mfi
+ and pow_GR_exp_X = pow_GR_signexp_X, pow_GR_17ones
+ fclass.m p11,p0 = f9, 0xc3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fms.s1 POW_r = POW_B, POW_NORM_X,f1
+ mov pow_GR_y_zero = 0
+}
+;;
+
+// Get exponent of |x|-1 to use in comparison to 2^-8
+{ .mmi
+ getf.exp pow_GR_signexp_Xm1 = POW_Xm1
+ sub pow_GR_true_exp_X = pow_GR_exp_X, pow_GR_16ones
+ extr.u pow_GR_offset = pow_GR_sig_X, 55, 8
+}
+;;
+
+{ .mfi
+ alloc r32=ar.pfs,2,19,4,0
+ fcvt.fx.s1 POW_int_Y = POW_NORM_Y
+ shladd pow_AD_Tt = pow_GR_offset, 3, pow_AD_Tt
+}
+{ .mfi
+ setf.sig POW_int_K = pow_GR_true_exp_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+// p12 = TRUE if Y is ZERO
+// Compute xsq to decide later if |x|=1
+{ .mfi
+ ldfe POW_P1 = [pow_AD_P], 16
+ fclass.m p12,p0 = f9, 0x07
+ nop.i 999
+}
+{ .mfb
+ ldfe POW_P0 = [pow_AD_Q], 16
+ fma.s1 POW_xsq = POW_NORM_X, POW_NORM_X, f0
+(p11) br.cond.spnt POW_Y_NAN // Branch if y=nan
+}
+;;
+
+{ .mmf
+ getf.exp pow_GR_signexp_Y = POW_NORM_Y
+ ldfd POW_T = [pow_AD_Tt]
+ fma.s1 POW_rsq = POW_r, POW_r,f0
+}
+;;
+
+// p11 = TRUE ==> X is a NAN
+{ .mfi
+ ldfpd POW_log2_hi, POW_log2_lo = [pow_AD_Q], 16
+ fclass.m p11,p0 = POW_NORM_X, 0xc3
+ nop.i 999
+}
+{ .mfi
+ ldfe POW_inv_log2_by_128 = [pow_AD_P], 16
+ fma.s1 POW_delta = f0,f0,f0 // delta=0 in case |x| near 1
+(p12) mov pow_GR_y_zero = 1
+}
+;;
+
+{ .mfi
+ ldfd POW_Q2 = [pow_AD_P], 16
+ fnma.s1 POW_twoV = POW_r, POW_Q0_half,f1
+ and pow_GR_exp_Xm1 = pow_GR_signexp_Xm1, pow_GR_17ones
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_U = POW_NORM_Y,POW_r,f0
+ nop.i 999
+}
+;;
+
+// Determine if we will use the |x| near 1 path (p6) or normal path (p7)
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_K = POW_int_K
+ cmp.lt p6,p7 = pow_GR_exp_Xm1, pow_GR_exp_2tom8
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_G = f0,f0,f0 // G=0 in case |x| near 1
+(p11) br.cond.spnt POW_X_NAN // Branch if x=nan and y not nan
+}
+;;
+
+// If on the x near 1 path, assign r1 to r
+{ .mfi
+ ldfpd POW_Q1, POW_RSHF = [pow_AD_P], 16
+(p6) fma.s1 POW_r = POW_r1, f1, f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p6) fma.s1 POW_rsq = POW_r1, POW_r1, f0
+(p14) br.cond.spnt POW_X_0 // Branch if x zero and y not nan
+}
+;;
+
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y
+(p6) fnma.s1 POW_twoV = POW_r1, POW_Q0_half,f1
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ andcm pow_GR_sign_Y = pow_GR_signexp_Y, pow_GR_17ones
+(p6) fma.s1 POW_U = POW_NORM_Y,POW_r1,f0
+(p12) br.cond.spnt POW_Y_0 // Branch if y=zero, x not zero or nan
+}
+;;
+
+{ .mfi
+ ldfe POW_log2_by_128_lo = [pow_AD_P], 16
+(p7) fma.s1 POW_Z2 = POW_twoV, POW_U, f0
+ nop.i 999
+}
+{ .mfi
+ ldfe POW_log2_by_128_hi = [pow_AD_Q], 16
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcvt.xf POW_float_int_Y = POW_int_Y
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_G = POW_K, POW_log2_hi, POW_T
+ adds pow_AD_tbl1 = pow_tbl1 - pow_Tt, pow_AD_Q
+}
+;;
+
+// p11 = TRUE ==> X is NEGATIVE but not inf
+{ .mfi
+ nop.m 999
+ fclass.m p11,p0 = POW_NORM_X, 0x1a
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_delta = POW_K, POW_log2_lo, f0
+ adds pow_AD_tbl2 = pow_tbl2 - pow_tbl1, pow_AD_tbl1
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fma.s1 POW_Z = POW_twoV, POW_U, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_P1, POW_r, POW_P0
+ nop.i 999
+}
+;;
+
+// p11 = TRUE ==> X is NEGATIVE but not inf
+// p12 = TRUE ==> X is NEGATIVE AND Y already even int
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+{ .mfi
+ nop.m 999
+(p7) fma.s1 POW_Z = POW_NORM_Y, POW_G, POW_Z2
+(p11) cmp.gt.unc p12,p13 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Gpr = POW_G, f1, POW_r
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Yrcub = POW_rsq, POW_U, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_p = POW_rsq, POW_P2, POW_v2
+ nop.i 999
+}
+;;
+
+// Test if x inf
+{ .mfi
+ nop.m 999
+ fclass.m p15,p0 = POW_NORM_X, 0x23
+ nop.i 999
+}
+// By adding RSHF (1.1000...*2^63) we put integer part in rightmost significand
+{ .mfi
+ nop.m 999
+ fma.s1 POW_W1 = POW_Z, POW_inv_log2_by_128, POW_RSHF
+ nop.i 999
+}
+;;
+
+// p13 = TRUE ==> X is NEGATIVE AND Y possible int
+// p10 = TRUE ==> X is NEG and Y is an int
+// p12 = TRUE ==> X is NEG and Y is not an int
+{ .mfi
+ nop.m 999
+(p13) fcmp.eq.unc.s1 p10,p12 = POW_float_int_Y, POW_NORM_Y
+ mov pow_GR_xneg_yodd = 0
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_Y_Gpr = POW_NORM_Y, POW_Gpr, f0
+ nop.i 999
+}
+;;
+
+// p11 = TRUE ==> X is +1.0
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p11,p0 = POW_NORM_X, f1
+ nop.i 999
+}
+;;
+
+// Extract rounded integer from rightmost significand of POW_W1
+// By subtracting RSHF we get rounded integer POW_Nfloat
+{ .mfi
+ getf.sig pow_GR_int_N = POW_W1
+ fms.s1 POW_Nfloat = POW_W1, f1, POW_RSHF
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_Z3 = POW_p, POW_Yrcub, f0
+(p12) br.cond.spnt POW_X_NEG_Y_NONINT // Branch if x neg, y not integer
+}
+;;
+
+// p7 = TRUE ==> Y is +1.0
+// p12 = TRUE ==> X is NEGATIVE AND Y is an odd integer
+{ .mfi
+ getf.exp pow_GR_signexp_Y_Gpr = POW_Y_Gpr
+ fcmp.eq.s1 p7,p0 = POW_NORM_Y, f1 // Test for y=1.0
+(p10) tbit.nz.unc p12,p0 = pow_GR_sig_int_Y,0
+}
+{ .mfb
+ nop.m 999
+(p11) fma.s.s0 f8 = f1,f1,f0 // If x=1, result is +1
+(p15) br.cond.spnt POW_X_INF
+}
+;;
+
+// Test x and y and flag denormal
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p15,p0 = f8,f9
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s1 POW_e3 = POW_NORM_Y, POW_delta, f0
+(p11) br.ret.spnt b0 // Early exit if x=1.0, result is +1
+}
+;;
+
+{ .mfi
+(p12) mov pow_GR_xneg_yodd = 1
+ fnma.s1 POW_f12 = POW_Nfloat, POW_log2_by_128_lo, f1
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fnma.s1 POW_s = POW_Nfloat, POW_log2_by_128_hi, POW_Z
+(p7) br.ret.spnt b0 // Early exit if y=1.0, result is x
+}
+;;
+
+{ .mmi
+ and pow_GR_index1 = 0x0f, pow_GR_int_N
+ and pow_GR_index2 = 0x70, pow_GR_int_N
+ shr pow_int_GR_M = pow_GR_int_N, 7 // M = N/128
+}
+;;
+
+{ .mfi
+ shladd pow_AD_T1 = pow_GR_index1, 4, pow_AD_tbl1
+ fma.s1 POW_q = POW_Z3, POW_Q1, POW_Q0_half
+ add pow_int_GR_M = pow_GR_16ones, pow_int_GR_M
+}
+{ .mfi
+ add pow_AD_T2 = pow_AD_tbl2, pow_GR_index2
+ fma.s1 POW_Z3sq = POW_Z3, POW_Z3, f0
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ldfe POW_T1 = [pow_AD_T1]
+ ldfe POW_T2 = [pow_AD_T2]
+ nop.i 999
+}
+;;
+
+// f123 = f12*(e3+1) = f12*e3+f12
+{ .mfi
+ setf.exp POW_2M = pow_int_GR_M
+ fma.s1 POW_f123 = POW_e3,POW_f12,POW_f12
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_ssq = POW_s, POW_s, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v2 = POW_s, POW_Q2, POW_Q1
+ and pow_GR_exp_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+{ .mfi
+ cmp.ne p12,p13 = pow_GR_xneg_yodd, r0
+ fma.s1 POW_q = POW_Z3sq, POW_q, POW_Z3
+ sub pow_GR_true_exp_Y_Gpr = pow_GR_exp_Y_Gpr, pow_GR_16ones
+}
+;;
+
+// p8 TRUE ==> |Y(G + r)| >= 7
+
+// single
+// -2^7 -2^6 2^6 2^7
+// -----+-----+----+ ... +-----+-----+-----
+// p8 | p9 | p8
+// | | p10 | |
+
+// Form signexp of constants to indicate overflow
+{ .mfi
+ mov pow_GR_big_pos = 0x1007f
+ nop.f 999
+ cmp.le p8,p9 = 7, pow_GR_true_exp_Y_Gpr
+}
+{ .mfi
+ mov pow_GR_big_neg = 0x3007f
+ nop.f 999
+ andcm pow_GR_sign_Y_Gpr = pow_GR_signexp_Y_Gpr, pow_GR_17ones
+}
+;;
+
+// Form big positive and negative constants to test for possible overflow
+// Scale both terms of the polynomial by POW_f123
+{ .mfi
+ setf.exp POW_big_pos = pow_GR_big_pos
+ fma.s1 POW_ssq = POW_ssq, POW_f123, f0
+(p9) cmp.le.unc p0,p10 = 6, pow_GR_true_exp_Y_Gpr
+}
+{ .mfb
+ setf.exp POW_big_neg = pow_GR_big_neg
+ fma.s1 POW_1ps = POW_s, POW_f123, POW_f123
+(p8) br.cond.spnt POW_OVER_UNDER_X_NOT_INF
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fnma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fma.s1 POW_T1T2 = POW_T1, POW_T2, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_v210 = POW_s, POW_v2, POW_Q0_half
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_2Mqp1 = POW_2M, POW_q, POW_2M
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 POW_es = POW_ssq, POW_v210, POW_1ps
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 POW_A = POW_T1T2, POW_2Mqp1, f0
+ nop.i 999
+}
+;;
+
+// Dummy op to set inexact
+{ .mfi
+ nop.m 999
+ fma.s0 POW_tmp = POW_2M, POW_q, POW_2M
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = POW_A, POW_es, f0
+(p10) br.ret.sptk b0 // Exit main branch if no over/underflow
+}
+;;
+
+// POSSIBLE_OVER_UNDER
+// p6 = TRUE ==> Y_Gpr negative
+// Result is already computed. We just need to know if over/underflow occurred.
+
+{ .mfb
+ cmp.eq p0,p6 = pow_GR_sign_Y_Gpr, r0
+ nop.f 999
+(p6) br.cond.spnt POW_POSSIBLE_UNDER
+}
+;;
+
+// POSSIBLE_OVER
+// We got an answer.
+// overflow is a possibility, not a certainty
+
+
+// We define an overflow when the answer with
+// WRE set
+// user-defined rounding mode
+
+// double
+// Largest double is 7FE (biased double)
+// 7FE - 3FF + FFFF = 103FE
+// Create + largest_double_plus_ulp
+// Create - largest_double_plus_ulp
+// Calculate answer with WRE set.
+
+// single
+// Largest single is FE (biased double)
+// FE - 7F + FFFF = 1007E
+// Create + largest_single_plus_ulp
+// Create - largest_single_plus_ulp
+// Calculate answer with WRE set.
+
+// Cases when answer is ldn+1 are as follows:
+// ldn ldn+1
+// --+----------|----------+------------
+// |
+// +inf +inf -inf
+// RN RN
+// RZ
+
+// Put in s2 (td set, wre set)
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x42
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s.s2 POW_wre_urm_f8 = POW_A, POW_es, f0
+ nop.i 999
+}
+;;
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+// p7 = TRUE ==> yes, we have an overflow
+{ .mfi
+ nop.m 999
+ fcmp.ge.s1 p7, p8 = POW_wre_urm_f8, POW_big_pos
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.le.s1 p7, p0 = POW_wre_urm_f8, POW_big_neg
+ nop.i 999
+}
+;;
+
+{ .mbb
+(p7) mov pow_GR_tag = 30
+(p7) br.cond.spnt __libm_error_region // Branch if overflow
+ br.ret.sptk b0 // Exit if did not overflow
+}
+;;
+
+
+POW_POSSIBLE_UNDER:
+// We got an answer. input was < -2^9 but > -2^10 (double)
+// We got an answer. input was < -2^6 but > -2^7 (float)
+// underflow is a possibility, not a certainty
+
+// We define an underflow when the answer with
+// ftz set
+// is zero (tiny numbers become zero)
+// Notice (from below) that if we have an unlimited exponent range,
+// then there is an extra machine number E between the largest denormal and
+// the smallest normal.
+// So if with unbounded exponent we round to E or below, then we are
+// tiny and underflow has occurred.
+// But notice that you can be in a situation where we are tiny, namely
+// rounded to E, but when the exponent is bounded we round to smallest
+// normal. So the answer can be the smallest normal with underflow.
+// E
+// -----+--------------------+--------------------+-----
+// | | |
+// 1.1...10 2^-3fff 1.1...11 2^-3fff 1.0...00 2^-3ffe
+// 0.1...11 2^-3ffe (biased, 1)
+// largest dn smallest normal
+
+// Form small constant (2^-170) to correct underflow result near region of
+// smallest denormal in round-nearest.
+
+// Put in s2 (td set, ftz set)
+.pred.rel "mutex",p12,p13
+{ .mfi
+ mov pow_GR_Fpsr = ar40 // Read the fpsr--need to check rc.s0
+ fsetc.s2 0x7F,0x41
+ mov pow_GR_rcs0_mask = 0x0c00 // Set mask for rc.s0
+}
+{ .mfi
+(p12) mov pow_GR_tmp = 0x2ffff - 170
+ nop.f 999
+(p13) mov pow_GR_tmp = 0x0ffff - 170
+}
+;;
+
+{ .mfi
+ setf.exp POW_eps = pow_GR_tmp // Form 2^-170
+ fma.s.s2 POW_ftz_urm_f8 = POW_A, POW_es, f0
+ nop.i 999
+}
+;;
+
+// Return s2 to default
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40
+ nop.i 999
+}
+;;
+
+// p7 = TRUE ==> yes, we have an underflow
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p7, p0 = POW_ftz_urm_f8, f0
+ nop.i 999
+}
+;;
+
+{ .mmi
+(p7) and pow_GR_rcs0 = pow_GR_rcs0_mask, pow_GR_Fpsr // Isolate rc.s0
+;;
+(p7) cmp.eq.unc p6,p0 = pow_GR_rcs0, r0 // Test for round to nearest
+ nop.i 999
+}
+;;
+
+// Tweak result slightly if underflow to get correct rounding near smallest
+// denormal if round-nearest
+{ .mfi
+ nop.m 999
+(p6) fms.s.s0 f8 = POW_A, POW_es, POW_eps
+ nop.i 999
+}
+{ .mbb
+(p7) mov pow_GR_tag = 31
+(p7) br.cond.spnt __libm_error_region // Branch if underflow
+ br.ret.sptk b0 // Exit if did not underflow
+}
+;;
+
+POW_X_DENORM:
+// Here if x unorm. Use the NORM_X for getf instructions, and then back
+// to normal path
+{ .mfi
+ getf.exp pow_GR_signexp_X = POW_NORM_X
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mib
+ getf.sig pow_GR_sig_X = POW_NORM_X
+ nop.i 999
+ br.cond.sptk POW_COMMON
+}
+;;
+
+POW_X_0:
+// Here if x=0 and y not nan
+//
+// We have the following cases:
+// p6 x=0 and y>0 and is an integer (may be even or odd)
+// p7 x=0 and y>0 and is NOT an integer, return +0
+// p8 x=0 and y>0 and so big as to always be an even integer, return +0
+// p9 x=0 and y>0 and may not be integer
+// p10 x=0 and y>0 and is an odd integer, return x
+// p11 x=0 and y>0 and is an even integer, return +0
+// p12 used in dummy fcmp to set denormal flag if y=unorm
+// p13 x=0 and y>0
+// p14 x=0 and y=0, branch to code for calling error handling
+// p15 x=0 and y<0, branch to code for calling error handling
+//
+{ .mfi
+ getf.sig pow_GR_sig_int_Y = POW_int_Y // Get signif of int_Y
+ fcmp.lt.s1 p15,p13 = f9, f0 // Test for y<0
+ and pow_GR_exp_Y = pow_GR_signexp_Y, pow_GR_17ones
+}
+{ .mfb
+ cmp.ne p14,p0 = pow_GR_y_zero,r0 // Test for y=0
+ fcvt.xf POW_float_int_Y = POW_int_Y
+(p14) br.cond.spnt POW_X_0_Y_0 // Branch if x=0 and y=0
+}
+;;
+
+// If x=0 and y>0, test y and flag denormal
+{ .mfb
+(p13) cmp.gt.unc p8,p9 = pow_GR_exp_Y, pow_GR_10033 // Test y +big = even int
+(p13) fcmp.eq.s0 p12,p0 = f9,f0 // If x=0, y>0 dummy op to flag denormal
+(p15) br.cond.spnt POW_X_0_Y_NEG // Branch if x=0 and y<0
+}
+;;
+
+// Here if x=0 and y>0
+{ .mfi
+ nop.m 999
+(p9) fcmp.eq.unc.s1 p6,p7 = POW_float_int_Y, POW_NORM_Y // Test y=int
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s.s0 f8 = f0,f0,f0 // If x=0, y>0 and large even int, return +0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p7) fma.s.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y>0 and not integer
+(p6) tbit.nz.unc p10,p11 = pow_GR_sig_int_Y,0 // If y>0 int, test y even/odd
+}
+;;
+
+// Note if x=0, y>0 and odd integer, just return x
+{ .mfb
+ nop.m 999
+(p11) fma.s.s0 f8 = f0,f0,f0 // Result +0 if x=0 and y even integer
+ br.ret.sptk b0 // Exit if x=0 and y>0
+}
+;;
+
+POW_X_0_Y_0:
+// When X is +-0 and Y is +-0, IEEE returns 1.0
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 32
+ fma.s.s0 f8 = f1,f1,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+POW_X_0_Y_NEG:
+// When X is +-0 and Y is negative, IEEE returns
+// X Y answer
+// +0 -odd int +inf
+// -0 -odd int -inf
+
+// +0 !-odd int +inf
+// -0 !-odd int +inf
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// return +inf
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// return (sign_of_x)inf
+// p12 even
+// return +inf
+// p10 == Y is not an integer
+// return +inf
+//
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+;;
+
+{ .mfi
+ mov pow_GR_tag = 33
+(p7) fcmp.eq.unc.s1 p9,p10 = POW_float_int_Y, POW_NORM_Y
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p6) frcpa.s0 f8,p13 = f1, f0
+(p6) br.cond.sptk __libm_error_region // x=0, y<0, y large neg int
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p10) frcpa.s0 f8,p13 = f1, f0
+(p10) br.cond.sptk __libm_error_region // x=0, y<0, y not int
+}
+;;
+
+// x=0, y<0, y an int
+{ .mib
+ nop.m 999
+(p9) tbit.nz.unc p11,p12 = pow_GR_sig_int_Y,0
+ nop.b 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) frcpa.s0 f8,p13 = f1,f0
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) frcpa.s0 f8,p13 = f1,f8
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+POW_Y_0:
+// Here for y zero, x anything but zero and nan
+// Set flag if x denormal
+// Result is +1.0
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Sets flag if x denormal
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fma.s.s0 f8 = f1,f1,f0
+ br.ret.sptk b0
+}
+;;
+
+
+POW_X_INF:
+// Here when X is +-inf
+
+// X +inf Y +inf +inf
+// X -inf Y +inf +inf
+
+// X +inf Y >0 +inf
+// X -inf Y >0, !odd integer +inf <== (-inf)^0.5 = +inf !!
+// X -inf Y >0, odd integer -inf
+
+// X +inf Y -inf +0
+// X -inf Y -inf +0
+
+// X +inf Y <0 +0
+// X -inf Y <0, !odd integer +0
+// X -inf Y <0, odd integer -0
+
+// X + inf Y=+0 +1
+// X + inf Y=-0 +1
+// X - inf Y=+0 +1
+// X - inf Y=-0 +1
+
+// p13 == Y negative
+// p14 == Y positive
+
+// p6 == Y is a floating point number outside the integer.
+// Hence it is an integer and is even.
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+// p7 == Y is a floating point number within the integer range.
+// p9 == (int_Y = NORM_Y), Y is an integer, which may be odd or even.
+// p11 odd
+// p13 == (Y negative)
+// return (sign_of_x)inf
+// p14 == (Y positive)
+// return (sign_of_x)0
+// pxx even
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+
+// pxx == Y is not an integer
+// p13 == (Y negative)
+// return +inf
+// p14 == (Y positive)
+// return +0
+//
+
+// If x=inf, test y and flag denormal
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p10,p11 = f9,f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.lt.s0 p13,p14 = POW_NORM_Y,f0
+ cmp.gt p6,p7 = pow_GR_exp_Y, pow_GR_10033
+}
+{ .mfi
+ nop.m 999
+ fclass.m p12,p0 = f9, 0x23 //@inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p15,p0 = f9, 0x07 //@zero
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p15) fmerge.s f8 = f1,f1 // Return +1.0 if x=inf, y=0
+(p15) br.ret.spnt b0 // Exit if x=inf, y=0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) frcpa.s1 f8,p10 = f1,f0 // If x=inf, y>0, assume result +inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p13) fma.s.s0 f8 = f0,f0,f0 // If x=inf, y<0, assume result +0.0
+(p12) br.ret.spnt b0 // Exit if x=inf, y=inf
+}
+;;
+
+// Here if x=inf, and 0 < |y| < inf. Need to correct results if y odd integer.
+{ .mfi
+ nop.m 999
+(p7) fcmp.eq.unc.s1 p9,p0 = POW_float_int_Y, POW_NORM_Y // Is y integer?
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ nop.f 999
+(p9) tbit.nz.unc p11,p0 = pow_GR_sig_int_Y,0 // Test for y odd integer
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fmerge.s f8 = POW_NORM_X,f8 // If y odd integer use sign of x
+ br.ret.sptk b0 // Exit for x=inf, 0 < |y| < inf
+}
+;;
+
+
+POW_X_NEG_Y_NONINT:
+// When X is negative and Y is a non-integer, IEEE
+// returns a qnan indefinite.
+// We call error support with this value
+
+{ .mfb
+ mov pow_GR_tag = 34
+ frcpa.s0 f8,p6 = f0,f0
+ br.cond.sptk __libm_error_region
+}
+;;
+
+POW_X_NAN:
+// Here if x=nan, y not nan
+{ .mfi
+ nop.m 999
+ fclass.m p9,p13 = f9, 0x07 // Test y=zero
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p13) fma.s.s0 f8 = f8,f1,f0
+(p13) br.ret.sptk b0 // Exit if x nan, y anything but zero or nan
+}
+;;
+
+POW_X_NAN_Y_0:
+// When X is a NAN and Y is zero, IEEE returns 1.
+// We call error support with this value.
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p6,p0 = f8,f0 // Dummy op to set invalid on snan
+ nop.i 999
+}
+{ .mfb
+ mov pow_GR_tag = 35
+ fma.s.s0 f8 = f0,f0,f1
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+POW_OVER_UNDER_X_NOT_INF:
+
+// p8 is TRUE for overflow
+// p9 is TRUE for underflow
+
+// if y is infinity, we should not over/underflow
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p14, p13 = POW_xsq,f1 // Test |x|=1
+ cmp.eq p8,p9 = pow_GR_sign_Y_Gpr, r0
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p14) fclass.m.unc p15, p0 = f9, 0x23 // If |x|=1, test y=inf
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p13) fclass.m.unc p11,p0 = f9, 0x23 // If |x| not 1, test y=inf
+ nop.i 999
+}
+;;
+
+// p15 = TRUE if |x|=1, y=inf, return +1
+{ .mfb
+ nop.m 999
+(p15) fma.s.s0 f8 = f1,f1,f0 // If |x|=1, y=inf, result +1
+(p15) br.ret.spnt b0 // Exit if |x|=1, y=inf
+}
+;;
+
+.pred.rel "mutex",p8,p9
+{ .mfb
+(p8) setf.exp f8 = pow_GR_17ones // If exp(+big), result inf
+(p9) fmerge.s f8 = f0,f0 // If exp(-big), result 0
+(p11) br.ret.sptk b0 // Exit if |x| not 1, y=inf
+}
+;;
+
+{ .mfb
+ nop.m 999
+ nop.f 999
+ br.cond.sptk POW_OVER_UNDER_ERROR // Branch if y not inf
+}
+;;
+
+
+POW_Y_NAN:
+// Here if y=nan, x anything
+// If x = +1 then result is +1, else result is quiet Y
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p10,p9 = POW_NORM_X, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fcmp.eq.s0 p6,p0 = f9,f1 // Set invalid, even if x=+1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fma.s.s0 f8 = f1,f1,f0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fma.s.s0 f8 = f9,f8,f0
+ br.ret.sptk b0 // Exit y=nan
+}
+;;
+
+
+POW_OVER_UNDER_ERROR:
+// Here if we have overflow or underflow.
+// Enter with p12 true if x negative and y odd int to force -0 or -inf
+
+{ .mfi
+ sub pow_GR_17ones_m1 = pow_GR_17ones, r0, 1
+ nop.f 999
+ mov pow_GR_one = 0x1
+}
+;;
+
+// overflow, force inf with O flag
+{ .mmb
+(p8) mov pow_GR_tag = 30
+(p8) setf.exp POW_tmp = pow_GR_17ones_m1
+ nop.b 999
+}
+;;
+
+// underflow, force zero with I, U flags
+{ .mmi
+(p9) mov pow_GR_tag = 31
+(p9) setf.exp POW_tmp = pow_GR_one
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s.s0 f8 = POW_tmp, POW_tmp, f0
+ nop.i 999
+}
+;;
+
+// p12 x is negative and y is an odd integer, change sign of result
+{ .mfi
+ nop.m 999
+(p12) fnma.s.s0 f8 = POW_tmp, POW_tmp, f0
+ nop.i 999
+}
+;;
+
+GLOBAL_IEEE754_END(powf)
+libm_alias_float_other (__pow, pow)
+#ifdef SHARED
+.symver powf,powf@@GLIBC_2.27
+.weak __powf_compat
+.set __powf_compat,__powf
+.symver __powf_compat,powf@GLIBC_2.2
+#endif
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfs [GR_Parameter_Y] = POW_NORM_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfs [GR_Parameter_X] = POW_NORM_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_powf_log2_data.c b/sysdeps/ia64/fpu/e_powf_log2_data.c
new file mode 100644
index 0000000000..1cc8931700
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_powf_log2_data.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_powl.S b/sysdeps/ia64/fpu/e_powl.S
new file mode 100644
index 0000000000..e4ae4e3e15
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_powl.S
@@ -0,0 +1,2810 @@
+.file "powl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+//
+// Function: powl(x,y), where
+// y
+// powl(x,y) = x , for double extended precision x and y values
+//
+//*********************************************************************
+//
+// History:
+// 02/02/00 (Hand Optimized)
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 01/22/01 Corrected results for powl(1,inf), powl(1,nan), and
+// powl(snan,0) to be 1 per C99, not nan. Fixed many flag settings.
+// 02/06/01 Call __libm_error support if over/underflow when y=2.
+// 04/17/01 Support added for y close to 1 and x a non-special value.
+// Shared software under/overflow detection for all paths
+// 02/07/02 Corrected sf3 setting to disable traps
+// 05/13/02 Improved performance of all paths
+// 02/10/03 Reordered header: .section, .global, .proc, .align;
+// used data8 for long double table values
+// 04/17/03 Added missing mutex directive
+// 10/13/03 Corrected .endp names to match .proc names
+//
+//*********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers:
+// f8 (Input x and Return Value)
+// f9 (Input y)
+// f10-f15,f32-f79
+//
+// General Purpose Registers:
+// Locals r14-24,r32-r65
+// Parameters to __libm_error_support r62,r63,r64,r65
+//
+// Predicate Registers: p6-p15
+//
+//*********************************************************************
+//
+// Special Cases and IEEE special conditions:
+//
+// Denormal fault raised on denormal inputs
+// Overflow exceptions raised when appropriate for pow
+// Underflow exceptions raised when appropriate for pow
+// (Error Handling Routine called for overflow and Underflow)
+// Inexact raised when appropriate by algorithm
+//
+// 1. (anything) ** NatVal or (NatVal) ** anything is NatVal
+// 2. X or Y unsupported or sNaN is qNaN/Invalid
+// 3. (anything) ** 0 is 1
+// 4. (anything) ** 1 is itself
+// 5. (anything except 1) ** qNAN is qNAN
+// 6. qNAN ** (anything except 0) is qNAN
+// 7. +-(|x| > 1) ** +INF is +INF
+// 8. +-(|x| > 1) ** -INF is +0
+// 9. +-(|x| < 1) ** +INF is +0
+// 10. +-(|x| < 1) ** -INF is +INF
+// 11. +-1 ** +-INF is +1
+// 12. +0 ** (+anything except 0, NAN) is +0
+// 13. -0 ** (+anything except 0, NAN, odd integer) is +0
+// 14. +0 ** (-anything except 0, NAN) is +INF/div_0
+// 15. -0 ** (-anything except 0, NAN, odd integer) is +INF/div_0
+// 16. -0 ** (odd integer) = -( +0 ** (odd integer) )
+// 17. +INF ** (+anything except 0,NAN) is +INF
+// 18. +INF ** (-anything except 0,NAN) is +0
+// 19. -INF ** (anything except NAN) = -0 ** (-anything)
+// 20. (-anything) ** (integer) is (-1)**(integer)*(+anything**integer)
+// 21. (-anything except 0 and inf) ** (non-integer) is qNAN/Invalid
+// 22. X or Y denorm/unorm and denorm/unorm operand trap is enabled,
+// generate denorm/unorm fault except if invalid or div_0 raised.
+//
+//*********************************************************************
+//
+// Algorithm
+// =========
+//
+// Special Cases
+//
+// If Y = 2, return X*X.
+// If Y = 0.5, return sqrt(X).
+//
+// Compute log(X) to extra precision.
+//
+// ker_log_80( X, logX_hi, logX_lo, Safe );
+//
+// ...logX_hi + logX_lo approximates log(X) to roughly 80
+// ...significant bits of accuracy.
+//
+// Compute Y*log(X) to extra precision.
+//
+// P_hi := Y * logX_hi
+// P_lo := Y * logX_hi - P_hi ...using FMA
+// P_lo := Y * logX_lo + P_lo ...using FMA
+//
+// Compute exp(P_hi + P_lo)
+//
+// Flag := 2;
+// Expo_Range := 2; (assuming double-extended power function)
+// ker_exp_64( P_hi, P_lo, Flag, Expo_Range,
+// Z_hi, Z_lo, scale, Safe )
+//
+// scale := sgn * scale
+//
+// If (Safe) then ...result will not over/underflow
+// return scale*Z_hi + (scale*Z_lo)
+// quickly
+// Else
+// take necessary precaution in computing
+// scale*Z_hi + (scale*Z_lo)
+// to set possible exceptions correctly.
+// End If
+//
+// Case_Y_Special
+//
+// ...Follow the order of the case checks
+//
+// If Y is +-0, return +1 without raising any exception.
+// If Y is +1, return X without raising any exception.
+// If Y is qNaN, return Y without exception.
+// If X is qNaN, return X without exception.
+//
+// At this point, X is real and Y is +-inf.
+// Thus |X| can only be 1, strictly bigger than 1, or
+// strictly less than 1.
+//
+// If |X| < 1, then
+// return ( Y == +inf? +0 : +inf )
+// elseif |X| > 1, then
+// return ( Y == +inf? +0 : +inf )
+// else
+// goto Case_Invalid
+//
+// Case_X_Special
+//
+// ...Follow the order of the case checks
+// ...Note that Y is real, finite, non-zero, and not +1.
+//
+// If X is qNaN, return X without exception.
+//
+// If X is +-0,
+// return ( Y > 0 ? +0 : +inf )
+//
+// If X is +inf
+// return ( Y > 0 ? +inf : +0 )
+//
+// If X is -inf
+// return -0 ** -Y
+// return ( Y > 0 ? +inf : +0 )
+//
+// Case_Invalid
+//
+// Return 0 * inf to generate a quiet NaN together
+// with an invalid exception.
+//
+// Implementation
+// ==============
+//
+// We describe the quick branch since this part is important
+// in reaching the normal case efficiently.
+//
+// STAGE 1
+// -------
+// This stage contains two threads.
+//
+// Stage1.Thread1
+//
+// fclass.m X_excep, X_ok = X, (NatVal or s/qNaN) or
+// +-0, +-infinity
+//
+// fclass.nm X_unsupp, X_supp = X, (NatVal or s/qNaN) or
+// +-(0, unnorm, norm, infinity)
+//
+// X_norm := fnorm( X ) with traps disabled
+//
+// If (X_excep) goto Filtering (Step 2)
+// If (X_unsupp) goto Filtering (Step 2)
+//
+// Stage1.Thread2
+// ..............
+//
+// fclass.m Y_excep, Y_ok = Y, (NatVal or s/qNaN) or
+// +-0, +-infinity
+//
+// fclass.nm Y_unsupp, Y_supp = Y, (NatVal or s/qNaN) or
+// +-(0, unnorm, norm, infinity)
+//
+// Y_norm := fnorm( Y ) with traps disabled
+//
+// If (Y_excep) goto Filtering (Step 2)
+// If (Y_unsupp) goto Filtering (Step 2)
+//
+//
+// STAGE 2
+// -------
+// This stage contains two threads.
+//
+// Stage2.Thread1
+// ..............
+//
+// Set X_lt_0 if X < 0 (using fcmp)
+// sgn := +1.0
+// If (X_lt_0) goto Filtering (Step 2)
+//
+// Stage2.Thread2
+// ..............
+//
+// Set Y_is_1 if Y = +1 (using fcmp)
+// If (Y_is_1) goto Filtering (Step 2)
+//
+// STAGE 3
+// -------
+// This stage contains two threads.
+//
+//
+// Stage3.Thread1
+// ..............
+//
+// X := fnorm(X) in prevailing traps
+//
+//
+// Stage3.Thread2
+// ..............
+//
+// Y := fnorm(Y) in prevailing traps
+//
+// STAGE 4
+// -------
+//
+// Go to Case_Normal.
+//
+
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 2^12/ln(2) is needed for the computation of N. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. RSHF_2TO51 = 1.1000..00 * 2^(63-12)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*2^12/ln2 into the rightmost bits of the significand.
+// The result of this fma is N_signif.
+// 2. RSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from N_signif * 2^(-51) to give
+// the integer part of N, N_fix, as a floating-point number.
+// The result of this fms is float_N.
+RODATA
+
+.align 16
+// L_hi, L_lo
+LOCAL_OBJECT_START(Constants_exp_64_Arg)
+data8 0xB17217F400000000,0x00003FF2 // L_hi = hi part log(2)/2^12
+data8 0xF473DE6AF278ECE6,0x00003FD4 // L_lo = lo part log(2)/2^12
+LOCAL_OBJECT_END(Constants_exp_64_Arg)
+
+LOCAL_OBJECT_START(Constants_exp_64_A)
+// Reversed
+data8 0xAAAAAAABB1B736A0,0x00003FFA
+data8 0xAAAAAAAB90CD6327,0x00003FFC
+data8 0xFFFFFFFFFFFFFFFF,0x00003FFD
+LOCAL_OBJECT_END(Constants_exp_64_A)
+
+LOCAL_OBJECT_START(Constants_exp_64_P)
+// Reversed
+data8 0xD00D6C8143914A8A,0x00003FF2
+data8 0xB60BC4AC30304B30,0x00003FF5
+data8 0x888888887474C518,0x00003FF8
+data8 0xAAAAAAAA8DAE729D,0x00003FFA
+data8 0xAAAAAAAAAAAAAF61,0x00003FFC
+data8 0x80000000000004C7,0x00003FFE
+LOCAL_OBJECT_END(Constants_exp_64_P)
+
+LOCAL_OBJECT_START(Constants_exp_64_T1)
+data4 0x3F800000,0x3F8164D2,0x3F82CD87,0x3F843A29
+data4 0x3F85AAC3,0x3F871F62,0x3F88980F,0x3F8A14D5
+data4 0x3F8B95C2,0x3F8D1ADF,0x3F8EA43A,0x3F9031DC
+data4 0x3F91C3D3,0x3F935A2B,0x3F94F4F0,0x3F96942D
+data4 0x3F9837F0,0x3F99E046,0x3F9B8D3A,0x3F9D3EDA
+data4 0x3F9EF532,0x3FA0B051,0x3FA27043,0x3FA43516
+data4 0x3FA5FED7,0x3FA7CD94,0x3FA9A15B,0x3FAB7A3A
+data4 0x3FAD583F,0x3FAF3B79,0x3FB123F6,0x3FB311C4
+data4 0x3FB504F3,0x3FB6FD92,0x3FB8FBAF,0x3FBAFF5B
+data4 0x3FBD08A4,0x3FBF179A,0x3FC12C4D,0x3FC346CD
+data4 0x3FC5672A,0x3FC78D75,0x3FC9B9BE,0x3FCBEC15
+data4 0x3FCE248C,0x3FD06334,0x3FD2A81E,0x3FD4F35B
+data4 0x3FD744FD,0x3FD99D16,0x3FDBFBB8,0x3FDE60F5
+data4 0x3FE0CCDF,0x3FE33F89,0x3FE5B907,0x3FE8396A
+data4 0x3FEAC0C7,0x3FED4F30,0x3FEFE4BA,0x3FF28177
+data4 0x3FF5257D,0x3FF7D0DF,0x3FFA83B3,0x3FFD3E0C
+LOCAL_OBJECT_END(Constants_exp_64_T1)
+
+LOCAL_OBJECT_START(Constants_exp_64_T2)
+data4 0x3F800000,0x3F80058C,0x3F800B18,0x3F8010A4
+data4 0x3F801630,0x3F801BBD,0x3F80214A,0x3F8026D7
+data4 0x3F802C64,0x3F8031F2,0x3F803780,0x3F803D0E
+data4 0x3F80429C,0x3F80482B,0x3F804DB9,0x3F805349
+data4 0x3F8058D8,0x3F805E67,0x3F8063F7,0x3F806987
+data4 0x3F806F17,0x3F8074A8,0x3F807A39,0x3F807FCA
+data4 0x3F80855B,0x3F808AEC,0x3F80907E,0x3F809610
+data4 0x3F809BA2,0x3F80A135,0x3F80A6C7,0x3F80AC5A
+data4 0x3F80B1ED,0x3F80B781,0x3F80BD14,0x3F80C2A8
+data4 0x3F80C83C,0x3F80CDD1,0x3F80D365,0x3F80D8FA
+data4 0x3F80DE8F,0x3F80E425,0x3F80E9BA,0x3F80EF50
+data4 0x3F80F4E6,0x3F80FA7C,0x3F810013,0x3F8105AA
+data4 0x3F810B41,0x3F8110D8,0x3F81166F,0x3F811C07
+data4 0x3F81219F,0x3F812737,0x3F812CD0,0x3F813269
+data4 0x3F813802,0x3F813D9B,0x3F814334,0x3F8148CE
+data4 0x3F814E68,0x3F815402,0x3F81599C,0x3F815F37
+LOCAL_OBJECT_END(Constants_exp_64_T2)
+
+LOCAL_OBJECT_START(Constants_exp_64_W1)
+data8 0x0000000000000000, 0xBE384454171EC4B4
+data8 0xBE6947414AA72766, 0xBE5D32B6D42518F8
+data8 0x3E68D96D3A319149, 0xBE68F4DA62415F36
+data8 0xBE6DDA2FC9C86A3B, 0x3E6B2E50F49228FE
+data8 0xBE49C0C21188B886, 0x3E64BFC21A4C2F1F
+data8 0xBE6A2FBB2CB98B54, 0x3E5DC5DE9A55D329
+data8 0x3E69649039A7AACE, 0x3E54728B5C66DBA5
+data8 0xBE62B0DBBA1C7D7D, 0x3E576E0409F1AF5F
+data8 0x3E6125001A0DD6A1, 0xBE66A419795FBDEF
+data8 0xBE5CDE8CE1BD41FC, 0xBE621376EA54964F
+data8 0x3E6370BE476E76EE, 0x3E390D1A3427EB92
+data8 0x3E1336DE2BF82BF8, 0xBE5FF1CBD0F7BD9E
+data8 0xBE60A3550CEB09DD, 0xBE5CA37E0980F30D
+data8 0xBE5C541B4C082D25, 0xBE5BBECA3B467D29
+data8 0xBE400D8AB9D946C5, 0xBE5E2A0807ED374A
+data8 0xBE66CB28365C8B0A, 0x3E3AAD5BD3403BCA
+data8 0x3E526055C7EA21E0, 0xBE442C75E72880D6
+data8 0x3E58B2BB85222A43, 0xBE5AAB79522C42BF
+data8 0xBE605CB4469DC2BC, 0xBE589FA7A48C40DC
+data8 0xBE51C2141AA42614, 0xBE48D087C37293F4
+data8 0x3E367A1CA2D673E0, 0xBE51BEBB114F7A38
+data8 0xBE6348E5661A4B48, 0xBDF526431D3B9962
+data8 0x3E3A3B5E35A78A53, 0xBE46C46C1CECD788
+data8 0xBE60B7EC7857D689, 0xBE594D3DD14F1AD7
+data8 0xBE4F9C304C9A8F60, 0xBE52187302DFF9D2
+data8 0xBE5E4C8855E6D68F, 0xBE62140F667F3DC4
+data8 0xBE36961B3BF88747, 0x3E602861C96EC6AA
+data8 0xBE3B5151D57FD718, 0x3E561CD0FC4A627B
+data8 0xBE3A5217CA913FEA, 0x3E40A3CC9A5D193A
+data8 0xBE5AB71310A9C312, 0x3E4FDADBC5F57719
+data8 0x3E361428DBDF59D5, 0x3E5DB5DB61B4180D
+data8 0xBE42AD5F7408D856, 0x3E2A314831B2B707
+LOCAL_OBJECT_END(Constants_exp_64_W1)
+
+LOCAL_OBJECT_START(Constants_exp_64_W2)
+data8 0x0000000000000000, 0xBE641F2537A3D7A2
+data8 0xBE68DD57AD028C40, 0xBE5C77D8F212B1B6
+data8 0x3E57878F1BA5B070, 0xBE55A36A2ECAE6FE
+data8 0xBE620608569DFA3B, 0xBE53B50EA6D300A3
+data8 0x3E5B5EF2223F8F2C, 0xBE56A0D9D6DE0DF4
+data8 0xBE64EEF3EAE28F51, 0xBE5E5AE2367EA80B
+data8 0x3E47CB1A5FCBC02D, 0xBE656BA09BDAFEB7
+data8 0x3E6E70C6805AFEE7, 0xBE6E0509A3415EBA
+data8 0xBE56856B49BFF529, 0x3E66DD3300508651
+data8 0x3E51165FC114BC13, 0x3E53333DC453290F
+data8 0x3E6A072B05539FDA, 0xBE47CD877C0A7696
+data8 0xBE668BF4EB05C6D9, 0xBE67C3E36AE86C93
+data8 0xBE533904D0B3E84B, 0x3E63E8D9556B53CE
+data8 0x3E212C8963A98DC8, 0xBE33138F032A7A22
+data8 0x3E530FA9BC584008, 0xBE6ADF82CCB93C97
+data8 0x3E5F91138370EA39, 0x3E5443A4FB6A05D8
+data8 0x3E63DACD181FEE7A, 0xBE62B29DF0F67DEC
+data8 0x3E65C4833DDE6307, 0x3E5BF030D40A24C1
+data8 0x3E658B8F14E437BE, 0xBE631C29ED98B6C7
+data8 0x3E6335D204CF7C71, 0x3E529EEDE954A79D
+data8 0x3E5D9257F64A2FB8, 0xBE6BED1B854ED06C
+data8 0x3E5096F6D71405CB, 0xBE3D4893ACB9FDF5
+data8 0xBDFEB15801B68349, 0x3E628D35C6A463B9
+data8 0xBE559725ADE45917, 0xBE68C29C042FC476
+data8 0xBE67593B01E511FA, 0xBE4A4313398801ED
+data8 0x3E699571DA7C3300, 0x3E5349BE08062A9E
+data8 0x3E5229C4755BB28E, 0x3E67E42677A1F80D
+data8 0xBE52B33F6B69C352, 0xBE6B3550084DA57F
+data8 0xBE6DB03FD1D09A20, 0xBE60CBC42161B2C1
+data8 0x3E56ED9C78A2B771, 0xBE508E319D0FA795
+data8 0xBE59482AFD1A54E9, 0xBE2A17CEB07FD23E
+data8 0x3E68BF5C17365712, 0x3E3956F9B3785569
+LOCAL_OBJECT_END(Constants_exp_64_W2)
+
+LOCAL_OBJECT_START(Constants_log_80_P)
+// P_8, P_7, ..., P_1
+data8 0xCCCE8B883B1042BC, 0x0000BFFB // P_8
+data8 0xE38997B7CADC2149, 0x00003FFB // P_7
+data8 0xFFFFFFFEB1ACB090, 0x0000BFFB // P_6
+data8 0x9249249806481C81, 0x00003FFC // P_5
+data8 0x0000000000000000, 0x00000000 // Pad for bank conflicts
+data8 0xAAAAAAAAAAAAB0EF, 0x0000BFFC // P_4
+data8 0xCCCCCCCCCCC91416, 0x00003FFC // P_3
+data8 0x8000000000000000, 0x0000BFFD // P_2
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFD // P_1
+LOCAL_OBJECT_END(Constants_log_80_P)
+
+LOCAL_OBJECT_START(Constants_log_80_Q)
+// log2_hi, log2_lo, Q_6, Q_5, Q_4, Q_3, Q_2, Q_1
+data8 0xB172180000000000,0x00003FFE
+data8 0x82E308654361C4C6,0x0000BFE2
+data8 0x92492453A51BE0AF,0x00003FFC
+data8 0xAAAAAB73A0CFD29F,0x0000BFFC
+data8 0xCCCCCCCCCCCE3872,0x00003FFC
+data8 0xFFFFFFFFFFFFB4FB,0x0000BFFC
+data8 0xAAAAAAAAAAAAAAAB,0x00003FFD
+data8 0x8000000000000000,0x0000BFFE
+LOCAL_OBJECT_END(Constants_log_80_Q)
+
+LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h1)
+// Z1 - 16 bit fixed, G1 and H1 IEEE single, h1 IEEE double
+data4 0x00008000,0x3F800000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007879,0x3F70F0F0,0x3D785196,0x00000000
+data4 0xEBA0E0D1,0x8B1D330B,0x00003FDA,0x00000000
+data4 0x000071C8,0x3F638E38,0x3DF13843,0x00000000
+data4 0x9EADD553,0xE2AF365E,0x00003FE2,0x00000000
+data4 0x00006BCB,0x3F579430,0x3E2FF9A0,0x00000000
+data4 0x752F34A2,0xF585FEC3,0x0000BFE3,0x00000000
+data4 0x00006667,0x3F4CCCC8,0x3E647FD6,0x00000000
+data4 0x893B03F3,0xF3546435,0x00003FE2,0x00000000
+data4 0x00006187,0x3F430C30,0x3E8B3AE7,0x00000000
+data4 0x39CDD2AC,0xBABA62E0,0x00003FE4,0x00000000
+data4 0x00005D18,0x3F3A2E88,0x3EA30C68,0x00000000
+data4 0x457978A1,0x8718789F,0x00003FE2,0x00000000
+data4 0x0000590C,0x3F321640,0x3EB9CEC8,0x00000000
+data4 0x3185E56A,0x9442DF96,0x0000BFE4,0x00000000
+data4 0x00005556,0x3F2AAAA8,0x3ECF9927,0x00000000
+data4 0x2BBE2CBD,0xCBF9A4BF,0x00003FE4,0x00000000
+data4 0x000051EC,0x3F23D708,0x3EE47FC5,0x00000000
+data4 0x852D5935,0xF3537535,0x00003FE3,0x00000000
+data4 0x00004EC5,0x3F1D89D8,0x3EF8947D,0x00000000
+data4 0x46CDF32F,0xA1F1E699,0x0000BFDF,0x00000000
+data4 0x00004BDB,0x3F17B420,0x3F05F3A1,0x00000000
+data4 0xD8484CE3,0x84A61856,0x00003FE4,0x00000000
+data4 0x00004925,0x3F124920,0x3F0F4303,0x00000000
+data4 0xFF28821B,0xC7DD97E0,0x0000BFE2,0x00000000
+data4 0x0000469F,0x3F0D3DC8,0x3F183EBF,0x00000000
+data4 0xEF1FD32F,0xD3C4A887,0x00003FE3,0x00000000
+data4 0x00004445,0x3F088888,0x3F20EC80,0x00000000
+data4 0x464C76DA,0x84672BE6,0x00003FE5,0x00000000
+data4 0x00004211,0x3F042108,0x3F29516A,0x00000000
+data4 0x18835FB9,0x9A43A511,0x0000BFE5,0x00000000
+LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h1)
+
+LOCAL_OBJECT_START(Constants_log_80_Z_G_H_h2)
+// Z2 - 16 bit fixed, G2 and H2 IEEE single, h2 IEEE double
+data4 0x00008000,0x3F800000,0x00000000,0x00000000
+data4 0x00000000,0x00000000,0x00000000,0x00000000
+data4 0x00007F81,0x3F7F00F8,0x3B7F875D,0x00000000
+data4 0x211398BF,0xAD08B116,0x00003FDB,0x00000000
+data4 0x00007F02,0x3F7E03F8,0x3BFF015B,0x00000000
+data4 0xC376958E,0xB106790F,0x00003FDE,0x00000000
+data4 0x00007E85,0x3F7D08E0,0x3C3EE393,0x00000000
+data4 0x79A7679A,0xFD03F242,0x0000BFDA,0x00000000
+data4 0x00007E08,0x3F7C0FC0,0x3C7E0586,0x00000000
+data4 0x05E7AE08,0xF03F81C3,0x0000BFDF,0x00000000
+data4 0x00007D8D,0x3F7B1880,0x3C9E75D2,0x00000000
+data4 0x049EB22F,0xD1B87D3C,0x00003FDE,0x00000000
+data4 0x00007D12,0x3F7A2328,0x3CBDC97A,0x00000000
+data4 0x3A9E81E0,0xFABC8B95,0x00003FDF,0x00000000
+data4 0x00007C98,0x3F792FB0,0x3CDCFE47,0x00000000
+data4 0x7C4B5443,0xF5F3653F,0x00003FDF,0x00000000
+data4 0x00007C20,0x3F783E08,0x3CFC15D0,0x00000000
+data4 0xF65A1773,0xE78AB204,0x00003FE0,0x00000000
+data4 0x00007BA8,0x3F774E38,0x3D0D874D,0x00000000
+data4 0x7B8EF695,0xDB7CBFFF,0x0000BFE0,0x00000000
+data4 0x00007B31,0x3F766038,0x3D1CF49B,0x00000000
+data4 0xCF773FB3,0xC0241AEA,0x0000BFE0,0x00000000
+data4 0x00007ABB,0x3F757400,0x3D2C531D,0x00000000
+data4 0xC9539FDF,0xFC8F4D48,0x00003FE1,0x00000000
+data4 0x00007A45,0x3F748988,0x3D3BA322,0x00000000
+data4 0x954665C2,0x9CD035FB,0x0000BFE1,0x00000000
+data4 0x000079D1,0x3F73A0D0,0x3D4AE46F,0x00000000
+data4 0xDD367A30,0xEC9017C7,0x00003FE1,0x00000000
+data4 0x0000795D,0x3F72B9D0,0x3D5A1756,0x00000000
+data4 0xCB11189C,0xEE6625D3,0x0000BFE1,0x00000000
+data4 0x000078EB,0x3F71D488,0x3D693B9D,0x00000000
+data4 0xBE11C424,0xA49C8DB5,0x0000BFE0,0x00000000
+LOCAL_OBJECT_END(Constants_log_80_Z_G_H_h2)
+
+LOCAL_OBJECT_START(Constants_log_80_h3_G_H)
+// h3 IEEE double extended, H3 and G3 IEEE single
+data4 0x112666B0,0xAAACAAB1,0x00003FD3,0x3F7FFC00
+data4 0x9B7FAD21,0x90051030,0x00003FD8,0x3F7FF400
+data4 0xF4D783C4,0xA6B46F46,0x00003FDA,0x3F7FEC00
+data4 0x11C6DDCA,0xDA148D88,0x0000BFD8,0x3F7FE400
+data4 0xCA964D95,0xCE65C1D8,0x0000BFD8,0x3F7FDC00
+data4 0x23412D13,0x883838EE,0x0000BFDB,0x3F7FD400
+data4 0x983ED687,0xB7E5CFA1,0x00003FDB,0x3F7FCC08
+data4 0xE3C3930B,0xDBE23B16,0x0000BFD9,0x3F7FC408
+data4 0x48AA4DFC,0x9B92F1FC,0x0000BFDC,0x3F7FBC10
+data4 0xCE9C8F7E,0x9A8CEB15,0x0000BFD9,0x3F7FB410
+data4 0x0DECE74A,0x8C220879,0x00003FDC,0x3F7FAC18
+data4 0x2F053150,0xB25CA912,0x0000BFDA,0x3F7FA420
+data4 0xD9A5BE20,0xA5876555,0x00003FDB,0x3F7F9C20
+data4 0x2053F087,0xC919BB6E,0x00003FD9,0x3F7F9428
+data4 0x041E9A77,0xB70BDA79,0x00003FDC,0x3F7F8C30
+data4 0xEA1C9C30,0xF18A5C08,0x00003FDA,0x3F7F8438
+data4 0x796D89E5,0xA3790D84,0x0000BFDD,0x3F7F7C40
+data4 0xA2915A3A,0xE1852369,0x0000BFDD,0x3F7F7448
+data4 0xA39ED868,0xD803858F,0x00003FDC,0x3F7F6C50
+data4 0x9417EBB7,0xB2EEE356,0x0000BFDD,0x3F7F6458
+data4 0x9BB0D07F,0xED5C1F8A,0x0000BFDC,0x3F7F5C68
+data4 0xE87C740A,0xD6D201A0,0x0000BFDD,0x3F7F5470
+data4 0x1CA74025,0xE8DEBF5E,0x00003FDC,0x3F7F4C78
+data4 0x1F34A7EB,0x9A995A97,0x0000BFDC,0x3F7F4488
+data4 0x359EED97,0x9CB0F742,0x0000BFDA,0x3F7F3C90
+data4 0xBBC6A1C8,0xD6F833C2,0x0000BFDD,0x3F7F34A0
+data4 0xE71090EC,0xE1F68F2A,0x00003FDC,0x3F7F2CA8
+data4 0xC160A74F,0xD1881CF1,0x0000BFDB,0x3F7F24B8
+data4 0xD78CB5A4,0x9AD05AE2,0x00003FD6,0x3F7F1CC8
+data4 0x9A77DC4B,0xE658CB8E,0x0000BFDD,0x3F7F14D8
+data4 0x6BD6D312,0xBA281296,0x00003FDC,0x3F7F0CE0
+data4 0xF95210D0,0xB478BBEB,0x0000BFDB,0x3F7F04F0
+data4 0x38800100,0x39400480,0x39A00640,0x39E00C41 // H's start here
+data4 0x3A100A21,0x3A300F22,0x3A4FF51C,0x3A6FFC1D
+data4 0x3A87F20B,0x3A97F68B,0x3AA7EB86,0x3AB7E101
+data4 0x3AC7E701,0x3AD7DD7B,0x3AE7D474,0x3AF7CBED
+data4 0x3B03E1F3,0x3B0BDE2F,0x3B13DAAA,0x3B1BD766
+data4 0x3B23CC5C,0x3B2BC997,0x3B33C711,0x3B3BBCC6
+data4 0x3B43BAC0,0x3B4BB0F4,0x3B53AF6D,0x3B5BA620
+data4 0x3B639D12,0x3B6B9444,0x3B7393BC,0x3B7B8B6D
+LOCAL_OBJECT_END(Constants_log_80_h3_G_H)
+
+GR_sig_inv_ln2 = r14
+GR_rshf_2to51 = r15
+GR_exp_2tom51 = r16
+GR_rshf = r17
+GR_exp_half = r18
+GR_sign_mask = r19
+GR_exp_square_oflow = r20
+GR_exp_square_uflow = r21
+GR_exp_ynear1_oflow = r22
+GR_exp_ynear1_uflow = r23
+GR_signif_Z = r24
+
+GR_signexp_x = r32
+
+GR_exp_x = r33
+
+GR_Table_Ptr = r34
+
+GR_Table_Ptr1 = r35
+
+GR_Index1 = r36
+
+GR_Index2 = r37
+GR_Expo_X = r37
+
+GR_M = r38
+
+GR_X_0 = r39
+GR_Mask = r39
+
+GR_X_1 = r40
+GR_W1_ptr = r40
+
+GR_W2_ptr = r41
+GR_X_2 = r41
+
+GR_Z_1 = r42
+GR_M2 = r42
+
+GR_M1 = r43
+GR_Z_2 = r43
+
+GR_N = r44
+GR_k = r44
+
+GR_Big_Pos_Exp = r45
+
+GR_exp_pos_max = r46
+
+GR_exp_bias_p_k = r47
+
+GR_Index3 = r48
+GR_temp = r48
+
+GR_vsm_expo = r49
+
+GR_T1_ptr = r50
+GR_P_ptr1 = r50
+GR_T2_ptr = r51
+GR_P_ptr2 = r51
+GR_N_fix = r52
+GR_exp_y = r53
+GR_signif_y = r54
+GR_signexp_y = r55
+GR_fraction_y = r55
+GR_low_order_bit = r56
+GR_exp_mask = r57
+GR_exp_bias = r58
+GR_y_sign = r59
+GR_table_base = r60
+GR_ptr_exp_Arg = r61
+GR_Delta_Exp = r62
+GR_Special_Exp = r63
+GR_exp_neg_max = r64
+GR_Big_Neg_Exp = r65
+
+//** Registers for unwind support
+
+GR_SAVE_PFS = r59
+GR_SAVE_B0 = r60
+GR_SAVE_GP = r61
+GR_Parameter_X = r62
+GR_Parameter_Y = r63
+GR_Parameter_RESULT = r64
+GR_Parameter_TAG = r65
+
+//**
+
+FR_Input_X = f8
+FR_Result = f8
+FR_Input_Y = f9
+
+FR_Neg = f10
+FR_P_hi = f10
+FR_X = f10
+
+FR_Half = f11
+FR_h_3 = f11
+FR_poly_hi = f11
+
+FR_Sgn = f12
+
+FR_half_W = f13
+
+FR_X_cor = f14
+FR_P_lo = f14
+
+FR_W = f15
+
+FR_X_lo = f32
+
+FR_S = f33
+FR_W3 = f33
+
+FR_Y_hi = f34
+FR_logx_hi = f34
+
+FR_Z = f35
+FR_logx_lo = f35
+FR_GS_hi = f35
+FR_Y_lo = f35
+
+FR_r_cor = f36
+FR_Scale = f36
+
+FR_G_1 = f37
+FR_G = f37
+FR_Wsq = f37
+FR_temp = f37
+
+FR_H_1 = f38
+FR_H = f38
+FR_W4 = f38
+
+FR_h = f39
+FR_h_1 = f39
+FR_N = f39
+FR_P_7 = f39
+
+FR_G_2 = f40
+FR_P_8 = f40
+FR_L_hi = f40
+
+FR_H_2 = f41
+FR_L_lo = f41
+FR_A_1 = f41
+
+FR_h_2 = f42
+
+FR_W1 = f43
+
+FR_G_3 = f44
+FR_P_8 = f44
+FR_T1 = f44
+
+FR_log2_hi = f45
+FR_W2 = f45
+
+FR_GS_lo = f46
+FR_T2 = f46
+
+FR_W_1_p1 = f47
+FR_H_3 = f47
+
+FR_float_N = f48
+
+FR_A_2 = f49
+
+FR_Q_4 = f50
+FR_r4 = f50
+
+FR_Q_3 = f51
+FR_A_3 = f51
+
+FR_Q_2 = f52
+FR_P_2 = f52
+
+FR_Q_1 = f53
+FR_P_1 = f53
+FR_T = f53
+
+FR_Wp1 = f54
+FR_Q_5 = f54
+FR_P_3 = f54
+
+FR_Q_6 = f55
+
+FR_log2_lo = f56
+FR_Two = f56
+
+FR_Big = f57
+
+FR_neg_2_mK = f58
+
+FR_r = f59
+
+FR_poly_lo = f60
+
+FR_poly = f61
+
+FR_P_5 = f62
+FR_Result_small = f62
+
+FR_rsq = f63
+
+FR_Delta = f64
+
+FR_save_Input_X = f65
+FR_norm_X = f66
+FR_norm_Y = f67
+FR_Y_lo_2 = f68
+
+FR_P_6 = f69
+FR_Result_big = f69
+
+FR_RSHF_2TO51 = f70
+FR_INV_LN2_2TO63 = f71
+FR_2TOM51 = f72
+FR_RSHF = f73
+FR_TMP1 = f74
+FR_TMP2 = f75
+FR_TMP3 = f76
+FR_Tscale = f77
+FR_P_4 = f78
+FR_NBig = f79
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(powl)
+//
+// Get significand of x. It is the critical path.
+//
+{ .mfi
+ getf.sig GR_signif_Z = FR_Input_X // Get significand of x
+ fclass.m p11, p12 = FR_Input_X, 0x0b // Test x unorm
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fnorm.s1 FR_norm_X = FR_Input_X // Normalize x
+ mov GR_exp_half = 0xffff - 1 // Exponent for 0.5
+}
+;;
+
+{ .mfi
+ alloc r32 = ar.pfs,0,30,4,0
+ fclass.m p7, p0 = FR_Input_Y, 0x1E7 // Test y natval, nan, inf, zero
+ mov GR_exp_pos_max = 0x13fff // Max exponent for pos oflow test
+}
+{ .mfi
+ addl GR_table_base = @ltoff(Constants_exp_64_Arg#), gp // Ptr to tables
+ fnorm.s1 FR_norm_Y = FR_Input_Y // Normalize y
+ mov GR_exp_neg_max = 0x33fff // Max exponent for neg oflow test
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_y = FR_Input_Y // Get sign and exp of y
+(p12) fclass.m p11, p0 = FR_Input_Y, 0x0b // Test y unorm
+ mov GR_sign_mask = 0x20000 // Sign mask
+}
+{ .mfi
+ ld8 GR_table_base = [GR_table_base] // Get base address for tables
+ fadd.s1 FR_Two = f1, f1 // Form 2.0 for square test
+ mov GR_exp_mask = 0x1FFFF // Exponent mask
+}
+;;
+
+{ .mfi
+ getf.sig GR_signif_y = FR_Input_Y // Get significand of y
+ fclass.m p6, p0 = FR_Input_X, 0x1E7 // Test x natval, nan, inf, zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_x = FR_Input_X // Get signexp of x
+ fmerge.s FR_save_Input_X = FR_Input_X, FR_Input_X
+ extr.u GR_Index1 = GR_signif_Z, 59, 4 // Extract upper 4 signif bits of x
+}
+{ .mfb
+ setf.exp FR_Half = GR_exp_half // Load half
+ nop.f 999
+(p11) br.cond.spnt POWL_DENORM // Branch if x or y denorm/unorm
+}
+;;
+
+// Return here from POWL_DENORM
+POWL_COMMON:
+{ .mfi
+ setf.exp FR_Big = GR_exp_pos_max // Form big pos value for oflow test
+ fclass.nm p11, p0 = FR_Input_Y, 0x1FF // Test Y unsupported
+ shl GR_Index1 = GR_Index1,5 // Adjust index1 pointer x 32
+}
+{ .mfi
+ add GR_Table_Ptr = 0x7c0, GR_table_base // Constants_log_80_Z_G_H_h1
+ fma.s1 FR_Sgn = f1,f1,f0 // Assume result positive
+ mov GR_exp_bias = 0xFFFF // Form exponent bias
+}
+;;
+
+//
+// Identify NatVals, NaNs, Infs, and Zeros.
+//
+//
+// Remove sign bit from exponent of y.
+// Check for x = 1
+// Branch on Infs, Nans, Zeros, and Natvals
+// Check to see that exponent < 0
+//
+{ .mfi
+ setf.exp FR_NBig = GR_exp_neg_max // Form big neg value for oflow test
+ fclass.nm p8, p0 = FR_Input_X, 0x1FF // Test X unsupported
+ and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
+}
+{ .mfb
+ add GR_Index1 = GR_Index1,GR_Table_Ptr
+ nop.f 999
+(p6) br.cond.spnt POWL_64_SPECIAL // Branch if x natval, nan, inf, zero
+}
+;;
+
+// load Z_1 from Index1
+
+// There is logic starting here to determine if y is an integer when x < 0.
+// If 0 < |y| < 1 then clearly y is not an integer.
+// If |y| > 1, then the significand of y is shifted left by the size of
+// the exponent of y. This preserves the lsb of the integer part + the
+// fractional bits. The lsb of the integer can be tested to determine if
+// the integer is even or odd. The fractional bits can be tested. If zero,
+// then y is an integer.
+//
+{ .mfi
+ ld2 GR_Z_1 =[GR_Index1],4 // Load Z_1
+ fmerge.s FR_Z = f0, FR_norm_X // Z = |x|
+ extr.u GR_X_0 = GR_signif_Z, 49, 15 // Extract X_0 from significand
+}
+{ .mfb
+ cmp.lt p9, p0 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
+ nop.f 999
+(p7) br.cond.spnt POWL_64_SPECIAL // Branch if y natval, nan, inf, zero
+}
+;;
+
+{ .mfb
+ ldfs FR_G_1 = [GR_Index1],4 // Load G_1
+ fcmp.eq.s1 p10, p0 = FR_Input_Y, f1 // Test Y = +1.0
+(p8) br.cond.spnt POWL_64_UNSUPPORT // Branch if x unsupported
+}
+;;
+
+//
+// X_0 = High order 15 bit of Z
+//
+{ .mfb
+ ldfs FR_H_1 = [GR_Index1],8 // Load H_1
+(p9) fcmp.lt.unc.s1 p9, p0 = FR_Input_X, f0 // Test x<0, 0 <|y|<1
+(p11) br.cond.spnt POWL_64_UNSUPPORT // Branch if y unsupported
+}
+;;
+
+{ .mfi
+ ldfe FR_h_1 = [GR_Index1] // Load h_1
+ fcmp.eq.s1 p7, p0 = FR_Input_Y, FR_Two // Test y = 2.0
+ pmpyshr2.u GR_X_1 = GR_X_0,GR_Z_1,15 // X_1 = X_0 * Z_1 (bits 15-30)
+ // Wait 4 cycles to use result
+}
+{ .mfi
+ add GR_Table_Ptr = 0x9c0, GR_table_base // Constants_log_80_Z_G_H_h2
+ nop.f 999
+ sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent of y
+}
+;;
+
+//
+// Branch for (x < 0) and Y not an integer.
+//
+{ .mfb
+ nop.m 999
+ fcmp.lt.s1 p6, p0 = FR_Input_X, f0 // Test x < 0
+(p9) br.cond.spnt POWL_64_XNEG // Branch if x < 0, 0 < |y| < 1
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p12, p0 = FR_Input_X, f1 // Test x=+1.0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fsub.s1 FR_W = FR_Z, f1 // W = Z - 1
+(p7) br.cond.spnt POWL_64_SQUARE // Branch if y=2
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f1 // If y=+1.0, result=x
+(p6) shl GR_fraction_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
+ // Wait 4 cycles to use result
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p12) fma.s0 FR_Result = FR_Input_Y, f0, f1 // If x=1.0, result=1, chk denorm
+ extr.u GR_Index2 = GR_X_1, 6, 4 // Extract index2
+}
+;;
+
+//
+// N = exponent of Z
+//
+{ .mib
+ getf.exp GR_N = FR_Z // Get exponent of Z (also x)
+ shl GR_Index2=GR_Index2,5 // Index2 x 32 bytes
+(p10) br.ret.spnt b0 // Exit if y=+1.0
+}
+;;
+
+{ .mib
+ add GR_Index2 = GR_Index2, GR_Table_Ptr // Pointer to table 2
+ nop.i 999
+(p12) br.ret.spnt b0 // Exit if x=+1.0
+}
+;;
+
+{ .mmi
+ ld2 GR_Z_2 =[GR_Index2],4 // Load Z_2
+;;
+ ldfs FR_G_2 = [GR_Index2],4 // Load G_2
+ nop.i 999
+}
+;;
+
+{ .mii
+ ldfs FR_H_2 = [GR_Index2],8 // Load H_2
+(p6) tbit.nz.unc p9, p0 = GR_fraction_y, 63 // Test x<0 and y odd integer
+ add GR_Table_Ptr = 0xbcc, GR_table_base // Constants_log_80_h3_G_H, G_3
+}
+;;
+
+//
+// For x < 0 and y odd integer,, set sign = -1.
+//
+{ .mfi
+ getf.exp GR_M = FR_W // Get signexp of W
+ nop.f 999
+ pmpyshr2.u GR_X_2 = GR_X_1,GR_Z_2,15 // X_2 = X_1 * Z_2 (bits 15-30)
+}
+{ .mfi
+ ldfe FR_h_2 = [GR_Index2] // Load h_2
+(p9) fnma.s1 FR_Sgn = f1, f1, f0 // If x<0, y odd int, result negative
+ sub GR_N = GR_N, GR_exp_bias // Get true exponent of x = N
+}
+;;
+
+{ .mfi
+ add GR_Table_Ptr1 = 0xdc0, GR_table_base // Ptr to H_3
+ fcmp.eq.s0 p11, p0 = FR_Input_Y, FR_Half // Test y=0.5, also set denorm
+(p6) shl GR_fraction_y= GR_fraction_y, 1 // Shift left 1 to get fraction
+}
+;;
+
+{ .mmb
+ setf.sig FR_float_N = GR_N
+(p6) cmp.ne.unc p8, p0 = GR_fraction_y, r0 // Test x<0 and y not integer
+(p8) br.cond.spnt POWL_64_XNEG // Branch if x<0 and y not int
+}
+;;
+
+//
+// Raise possible denormal operand exception for both X and Y.
+// Set pointers in case |x| near 1
+// Branch to embedded sqrt(x) if y=0.5
+//
+{ .mfi
+ add GR_P_ptr1 = 0x6b0, GR_table_base // Constants_log_80_P, P8, NEAR path
+ fcmp.eq.s0 p12, p0 = FR_Input_X, FR_Input_Y // Dummy to set denormal
+ add GR_P_ptr2 = 0x700, GR_table_base // Constants_log_80_P, P4, NEAR path
+}
+{ .mfb
+ cmp.eq p15, p14 = r0, r0 // Assume result safe (no over/under)
+ fsub.s1 FR_Delta = FR_Input_Y,f1 // Delta = y - 1.0
+(p11) br.cond.spnt POWL_64_SQRT // Branch if y=0.5
+}
+;;
+
+//
+// Computes ln( x ) to extra precision
+// Input FR 1: FR_X
+// Output FR 2: FR_Y_hi
+// Output FR 3: FR_Y_lo
+// Output PR 1: PR_Safe
+//
+{ .mfi
+ and GR_M = GR_exp_mask, GR_M // Mask to get exponent of W
+ nop.f 999
+ extr.u GR_Index3 = GR_X_2, 1, 5 // Get index3
+}
+;;
+
+{ .mmi
+ shladd GR_Table_Ptr1 = GR_Index3,2,GR_Table_Ptr1 // Ptr to H_3
+ shladd GR_Index3 = GR_Index3,4,GR_Table_Ptr // Ptr to G_3
+ sub GR_M = GR_M, GR_exp_bias // Get true exponent of W
+}
+;;
+
+{ .mib
+ ldfs FR_G_3 = [GR_Index3],-12 // Load G_3
+ cmp.gt p7, p14 = -8, GR_M // Test if |x-1| < 2^-8
+(p7) br.cond.spnt LOGL80_NEAR // Branch if |x-1| < 2^-8
+}
+;;
+
+// Here if |x-1| >= 2^-8
+{ .mmf
+ ldfs FR_H_3 = [GR_Table_Ptr1] // Load H_3
+ nop.m 999
+ nop.f 999
+}
+;;
+
+{ .mfi
+ ldfe FR_h_3 = [GR_Index3] // Load h_3
+ fmerge.se FR_S = f1,FR_Z // S = merge of 1.0 and signif(Z)
+ nop.i 999
+}
+{ .mfi
+ add GR_Table_Ptr = 0x740, GR_table_base // Constants_log_80_Q
+ fmpy.s1 FR_G = FR_G_1, FR_G_2 // G = G_1 * G_2
+ nop.i 999
+}
+;;
+
+//
+// Begin Loading Q's - load log2_hi part
+//
+{ .mfi
+ ldfe FR_log2_hi = [GR_Table_Ptr],16 // Load log2_hi
+ fadd.s1 FR_H = FR_H_1, FR_H_2 // H = H_1 + H_2
+ nop.i 999
+};;
+
+//
+// h = h_1 + h_2
+//
+{ .mfi
+ ldfe FR_log2_lo = [GR_Table_Ptr],16 // Load log2_lo
+ fadd.s1 FR_h = FR_h_1, FR_h_2 // h = h_1 + h_2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe FR_Q_6 = [GR_Table_Ptr],16 // Load Q_6
+ fcvt.xf FR_float_N = FR_float_N
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe FR_Q_5 = [GR_Table_Ptr],16 // Load Q_5
+ nop.f 999
+ nop.i 999
+}
+;;
+
+//
+// G = G_1 * G_2 * G_3
+//
+{ .mfi
+ ldfe FR_Q_4 = [GR_Table_Ptr],16 // Load Q_4
+ fmpy.s1 FR_G = FR_G, FR_G_3
+ nop.i 999
+}
+;;
+
+//
+// H = H_1 + H_2 + H_3
+//
+{ .mfi
+ ldfe FR_Q_3 = [GR_Table_Ptr],16 // Load Q_3
+ fadd.s1 FR_H = FR_H, FR_H_3
+ nop.i 999
+}
+;;
+
+//
+// Y_lo = poly + Y_lo
+//
+// h = h_1 + h_2 + h_3
+//
+{ .mfi
+ ldfe FR_Q_2 = [GR_Table_Ptr],16 // Load Q_2
+ fadd.s1 FR_h = FR_h, FR_h_3
+ nop.i 999
+}
+;;
+
+//
+// GS_hi = G*S
+// r = G*S -1
+//
+{ .mfi
+ ldfe FR_Q_1 = [GR_Table_Ptr],16 // Load Q_1
+ fmpy.s1 FR_GS_hi = FR_G, FR_S
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fms.s1 FR_r = FR_G, FR_S, f1
+ nop.i 999
+}
+;;
+
+//
+// poly_lo = Q_5 + r * Q_6
+//
+{ .mfi
+ getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc
+ fma.s1 FR_poly_lo = FR_r, FR_Q_6, FR_Q_5
+ nop.i 999
+}
+//
+// r_cor = GS_hi -1
+//
+{ .mfi
+ nop.m 999
+ fsub.s1 FR_r_cor = FR_GS_hi, f1
+ nop.i 999
+}
+;;
+
+//
+// GS_lo = G*S - GS_hi
+//
+{ .mfi
+ nop.m 999
+ fms.s1 FR_GS_lo = FR_G, FR_S, FR_GS_hi
+ nop.i 999
+}
+;;
+
+//
+// rsq = r * r
+//
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999
+}
+//
+// G = float_N*log2_hi + H
+//
+{ .mfi
+ nop.m 999
+ fma.s1 FR_G = FR_float_N, FR_log2_hi, FR_H
+ nop.i 999
+}
+;;
+
+//
+// Y_lo = float_N*log2_lo + h
+//
+{ .mfi
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_float_N, FR_log2_lo, FR_h
+ nop.i 999
+}
+;;
+
+//
+// poly_lo = Q_4 + r * poly_lo
+// r_cor = r_cor - r
+//
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_4
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fsub.s1 FR_r_cor = FR_r_cor, FR_r
+ nop.i 999
+}
+;;
+
+//
+// poly_hi = r * Q_2 + Q_1
+// Y_hi = G + r
+//
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_r, FR_Q_2, FR_Q_1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fadd.s1 FR_Y_hi = FR_G, FR_r
+ nop.i 999
+}
+;;
+
+//
+// poly_lo = Q_3 + r * poly_lo
+// r_cor = r_cor + GS_lo
+//
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_r, FR_poly_lo, FR_Q_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fadd.s1 FR_r_cor = FR_r_cor, FR_GS_lo
+ nop.i 999
+}
+;;
+
+//
+// Y_lo = G - Y_hi
+//
+{ .mfi
+ nop.m 999
+ fsub.s1 FR_Y_lo_2 = FR_G, FR_Y_hi
+ nop.i 999
+}
+;;
+
+//
+// r_cor = r_cor + Y_lo
+// poly = poly_hi + rsq * poly_lo
+//
+{ .mfi
+ add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
+ fadd.s1 FR_r_cor = FR_r_cor, FR_Y_lo
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly_lo, FR_poly
+ nop.i 999
+}
+;;
+
+//
+// Load L_hi
+// Load L_lo
+// all long before they are needed.
+// They are used in LOGL_RETURN PATH
+//
+// Y_lo = Y_lo + r
+// poly = rsq * poly + r_cor
+//
+{ .mfi
+ ldfe FR_L_hi = [GR_Table_Ptr],16 // Load L_hi
+ fadd.s1 FR_Y_lo = FR_Y_lo_2, FR_r
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly, FR_r_cor
+ nop.i 999
+}
+;;
+
+{ .mfb
+ ldfe FR_L_lo = [GR_Table_Ptr],16 // Load L_lo
+ fadd.s1 FR_Y_lo = FR_Y_lo, FR_poly
+ br.cond.sptk LOGL_RETURN // Branch to common code
+}
+;;
+
+
+LOGL80_NEAR:
+// Here if |x-1| < 2^-8
+//
+// Branch LOGL80_NEAR
+//
+
+{ .mmf
+ ldfe FR_P_8 = [GR_P_ptr1],16 // Load P_8
+ ldfe FR_P_4 = [GR_P_ptr2],16 // Load P_4
+ fmpy.s1 FR_Wsq = FR_W, FR_W
+}
+;;
+
+{ .mmi
+ ldfe FR_P_7 = [GR_P_ptr1],16 // Load P_7
+ ldfe FR_P_3 = [GR_P_ptr2],16 // Load P_3
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ldfe FR_P_6 = [GR_P_ptr1],16 // Load P_6
+ ldfe FR_P_2 = [GR_P_ptr2],16 // Load P_2
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ldfe FR_P_5 = [GR_P_ptr1],16 // Load P_5
+ ldfe FR_P_1 = [GR_P_ptr2],16 // Load P_1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.exp GR_Delta_Exp = FR_Delta // Get signexp of y-1 for exp calc
+ fmpy.s1 FR_W4 = FR_Wsq, FR_Wsq
+ nop.i 999
+}
+{ .mfi
+ add GR_Table_Ptr = 0x0, GR_table_base // Constants_exp_64_Arg
+ fmpy.s1 FR_W3 = FR_Wsq, FR_W
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_half_W = FR_Half, FR_W
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe FR_L_hi = [GR_Table_Ptr],16
+ fma.s1 FR_poly_lo = FR_W, FR_P_8,FR_P_7
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_W, FR_P_4, FR_P_3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe FR_L_lo = [GR_Table_Ptr],16
+ fnma.s1 FR_Y_hi = FR_W, FR_half_W, FR_W
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_6
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_W, FR_poly, FR_P_2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fsub.s1 FR_Y_lo = FR_W, FR_Y_hi
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_W, FR_poly_lo, FR_P_5
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_W, FR_poly, FR_P_1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fnma.s1 FR_Y_lo = FR_W, FR_half_W, FR_Y_lo
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_poly_lo, FR_W4, FR_poly
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_poly, FR_W3, FR_Y_lo
+ nop.i 999
+}
+;;
+
+
+LOGL_RETURN:
+// Common code for completion of both logx paths
+
+//
+// L_hi, L_lo already loaded.
+//
+//
+// kernel_log_80 computed ln(X)
+// and return logX_hi and logX_lo as results.
+// PR_pow_Safe set as well.
+//
+//
+// Compute Y * (logX_hi + logX_lo)
+// P_hi -> X
+// P_lo -> X_cor
+// (Manipulate names so that inputs are in
+// the place kernel_exp expects them)
+//
+// This function computes exp( x + x_cor)
+// Input FR 1: FR_X
+// Input FR 2: FR_X_cor
+// Output FR 3: FR_Y_hi
+// Output FR 4: FR_Y_lo
+// Output FR 5: FR_Scale
+// Output PR 1: PR_Safe
+//
+// P15 is True
+//
+// Load constants used in computing N using right-shift technique
+{ .mlx
+ mov GR_exp_2tom51 = 0xffff-51
+ movl GR_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ add GR_Special_Exp = -50,GR_exp_bias
+ movl GR_rshf_2to51 = 0x4718000000000000 // 1.10000 2^(63+51)
+}
+;;
+
+//
+// Point to Table of W1s
+// Point to Table of W2s
+//
+{ .mmi
+ add GR_W1_ptr = 0x2b0, GR_table_base // Constants_exp_64_W1
+ add GR_W2_ptr = 0x4b0, GR_table_base // Constants_exp_64_W2
+ cmp.le p6,p0= GR_Delta_Exp,GR_Special_Exp
+};;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-12) to right shift int(N) into the significand
+
+{ .mfi
+ setf.sig FR_INV_LN2_2TO63 = GR_sig_inv_ln2 // form 1/ln2 * 2^63
+ nop.f 999
+ and GR_Delta_Exp=GR_Delta_Exp,GR_exp_mask // Get exponent of y-1
+}
+{ .mlx
+ setf.d FR_RSHF_2TO51 = GR_rshf_2to51 // Form const 1.1000 * 2^(63+51)
+ movl GR_rshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_X_lo = FR_Input_Y, FR_logx_lo // logx_lo is Y_lo
+ cmp.eq p15, p0= r0, r0 // Set p15, assume safe
+};;
+
+{ .mmi
+ setf.exp FR_2TOM51 = GR_exp_2tom51 // Form 2^-51 for scaling float_N
+ setf.d FR_RSHF = GR_rshf // Form right shift const 1.1000 * 2^63
+ add GR_Table_Ptr1 = 0x50, GR_table_base // Constants_exp_64_P for
+ // EXPL_SMALL path
+}
+;;
+
+{ .mmi
+ ldfe FR_P_6 = [GR_Table_Ptr1],16 // Load P_6 for EXPL_SMALL path
+;;
+ ldfe FR_P_5 = [GR_Table_Ptr1],16 // Load P_5 for EXPL_SMALL path
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe FR_P_4 = [GR_Table_Ptr1],16 // Load P_4 for EXPL_SMALL path
+ fma.s1 FR_P_hi = FR_Input_Y, FR_logx_hi,FR_X_lo // logx_hi ix Y_hi
+ nop.i 999
+}
+;;
+
+{ .mmi
+ ldfe FR_P_3 = [GR_Table_Ptr1],16 // Load P_3 for EXPL_SMALL path
+;;
+ ldfe FR_P_2 = [GR_Table_Ptr1],16 // Load P_2 for EXPL_SMALL path
+ nop.i 999
+}
+;;
+
+// N = X * Inv_log2_by_2^12
+// By adding 1.10...0*2^63 we shift and get round_int(N_signif) in significand.
+// We actually add 1.10...0*2^51 to X * Inv_log2 to do the same thing.
+{ .mfi
+ ldfe FR_P_1 = [GR_Table_Ptr1] // Load P_1 for EXPL_SMALL path
+ fma.s1 FR_N = FR_X, FR_INV_LN2_2TO63, FR_RSHF_2TO51
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+ fms.s1 FR_P_lo= FR_Input_Y, FR_logx_hi, FR_P_hi // P_hi is X
+(p6) br.cond.spnt POWL_Y_ALMOST_1 // Branch if |y-1| < 2^-50
+}
+;;
+
+{ .mmi
+ getf.exp GR_Expo_X = FR_X
+ add GR_T1_ptr = 0x0b0, GR_table_base // Constants_exp_64_T1
+ add GR_T2_ptr = 0x1b0, GR_table_base // Constants_exp_64_T2
+}
+;;
+
+// float_N = round_int(N)
+// The signficand of N contains the rounded integer part of X * 2^12/ln2,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into GR_N_fix.
+
+// Since N is scaled by 2^51, it must be multiplied by 2^-51
+// before the shift constant 1.10000 * 2^63 is subtracted to yield float_N.
+// Thus, float_N contains the floating point version of N
+
+
+{ .mfi
+ add GR_Table_Ptr = 0x20, GR_table_base // Constants_exp_64_A
+ fms.s1 FR_float_N = FR_N, FR_2TOM51, FR_RSHF // Form float_N
+ nop.i 999
+}
+// Create low part of Y(ln(x)_hi + ln(x)_lo) as P_lo
+{ .mfi
+ mov GR_Big_Pos_Exp = 0x3ffe // 16382, largest safe exponent
+ fadd.s1 FR_P_lo = FR_P_lo, FR_X_lo
+ mov GR_Big_Neg_Exp = -0x3ffd // -16381 smallest safe exponent
+};;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_rsq = FR_X, FR_X // rsq = X*X for EXPL_SMALL path
+ mov GR_vsm_expo = -70 // Exponent for very small path
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_P_6, FR_X, FR_P_5 // poly_lo for EXPL_SMALL path
+ add GR_temp = 0x1,r0 // For tiny signif if small path
+}
+;;
+
+//
+// If expo_X < -6 goto exp_small
+//
+{ .mmi
+ getf.sig GR_N_fix = FR_N
+ ldfe FR_A_3 = [GR_Table_Ptr],16 // Load A_3
+ and GR_Expo_X = GR_Expo_X, GR_exp_mask // Get exponent of X
+}
+;;
+
+{ .mfi
+ ldfe FR_A_2 = [GR_Table_Ptr],16 // Load A_2
+ nop.f 999
+ sub GR_Expo_X = GR_Expo_X, GR_exp_bias // Get true exponent of X
+}
+;;
+
+//
+// If -6 > Expo_X, set P9 and branch
+//
+{ .mfb
+ cmp.gt p9, p0 = -6, GR_Expo_X
+ fnma.s1 FR_r = FR_L_hi, FR_float_N, FR_X // r = X - L_hi * float_N
+(p9) br.cond.spnt EXPL_SMALL // Branch if |X| < 2^-6
+}
+;;
+
+//
+// If 14 <= Expo_X, set P10
+//
+{ .mib
+ cmp.le p10, p0 = 14, GR_Expo_X
+ nop.i 999
+(p10) br.cond.spnt EXPL_HUGE // Branch if |X| >= 2^14
+}
+;;
+
+//
+// Load single T1
+// Load single T2
+// W_1_p1 = W_1 + 1
+//
+{ .mmi
+ nop.m 999
+ nop.m 999
+ extr.u GR_M1 = GR_N_fix, 6, 6 // Extract index M_1
+}
+;;
+
+//
+// k = extr.u(N_fix,0,6)
+//
+{ .mmi
+ shladd GR_W1_ptr = GR_M1,3,GR_W1_ptr // Point to W1
+ shladd GR_T1_ptr = GR_M1,2,GR_T1_ptr // Point to T1
+ extr.u GR_M2 = GR_N_fix, 0, 6 // Extract index M_2
+}
+;;
+
+// N_fix is only correct up to 50 bits because of our right shift technique.
+// Actually in the normal path we will have restricted K to about 14 bits.
+// Somewhat arbitrarily we extract 32 bits.
+{ .mmi
+ ldfd FR_W1 = [GR_W1_ptr]
+ shladd GR_W2_ptr = GR_M2,3,GR_W2_ptr // Point to W2
+ extr GR_k = GR_N_fix, 12, 32 // Extract k
+}
+;;
+
+{ .mfi
+ ldfs FR_T1 = [GR_T1_ptr]
+ fnma.s1 FR_r = FR_L_lo, FR_float_N, FR_r
+ shladd GR_T2_ptr = GR_M2,2,GR_T2_ptr // Point to T2
+}
+{ .mfi
+ add GR_exp_bias_p_k = GR_exp_bias, GR_k
+ nop.f 999
+ cmp.gt p14,p15 = GR_k,GR_Big_Pos_Exp
+}
+;;
+
+//
+// if k < big_neg_exp, set p14 and Safe=False
+//
+{ .mmi
+ ldfs FR_T2 = [GR_T2_ptr]
+(p15) cmp.lt p14,p15 = GR_k,GR_Big_Neg_Exp
+ nop.i 999
+}
+;;
+
+{ .mmi
+ setf.exp FR_Scale = GR_exp_bias_p_k
+ ldfd FR_W2 = [GR_W2_ptr]
+ nop.i 999
+}
+;;
+
+{ .mfi
+ ldfe FR_A_1 = [GR_Table_Ptr],16
+ fadd.s1 FR_r = FR_r, FR_X_cor
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fadd.s1 FR_W_1_p1 = FR_W1, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_r, FR_A_3, FR_A_2
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_rsq = FR_r, FR_r
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_T = FR_T1, FR_T2
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_W = FR_W2, FR_W_1_p1, FR_W1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_TMP1 = FR_Scale, FR_Sgn, f0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_r, FR_poly, FR_A_1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_TMP2 = FR_T, f1, f0 // TMP2 = Y_hi = T
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fadd.s1 FR_Wp1 = FR_W, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly = FR_rsq, FR_poly,FR_r
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_Tscale = FR_T, FR_TMP1, f0 // Scale * Sgn * T
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_Wp1, FR_poly, FR_W
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fmpy.s1 FR_TMP3 = FR_Y_lo, FR_Tscale
+ br.cond.sptk POWL_64_SHARED
+}
+;;
+
+
+EXPL_SMALL:
+// Here if |ylogx| < 2^-6
+//
+// Begin creating lsb to perturb final result
+//
+{ .mfi
+ setf.sig FR_temp = GR_temp
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_4
+ cmp.lt p12, p0 = GR_Expo_X, GR_vsm_expo // Test |ylogx| < 2^-70
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_hi = FR_P_2, FR_X, FR_P_1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_TMP2 = f1, f1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_TMP1 = FR_Sgn, f1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_r4 = FR_rsq, FR_rsq
+(p12) cmp.eq p15, p0 = r0, r0 // Set safe if |ylogx| < 2^-70
+}
+{ .mfb
+ nop.m 999
+(p12) fmpy.s1 FR_TMP3 = FR_Sgn, FR_X
+(p12) br.cond.spnt POWL_64_SHARED // Branch if |ylogx| < 2^-70
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_lo = FR_poly_lo, FR_X, FR_P_3
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+ fma.s1 FR_poly_hi = FR_poly_hi, FR_rsq, FR_X
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s1 FR_Y_lo = FR_poly_lo, FR_r4, FR_poly_hi
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_TMP3 = FR_Y_lo, FR_TMP1 // Add sign info
+ nop.i 999
+}
+;;
+
+//
+// Toggle on last bit of Y_lo
+// Set lsb of Y_lo to 1
+//
+{ .mfi
+ nop.m 999
+ for FR_temp = FR_Y_lo,FR_temp
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fmerge.se FR_TMP3 = FR_TMP3,FR_temp
+ br.cond.sptk POWL_64_SHARED
+}
+;;
+
+
+EXPL_HUGE:
+// Here if |ylogx| >= 2^14
+{ .mfi
+ mov GR_temp = 0x0A1DC // If X < 0, exponent -24100
+ fcmp.gt.s1 p12, p13 = FR_X, f0 // Test X > 0
+ cmp.eq p14, p15 = r0, r0 // Set Safe to false
+}
+;;
+
+{ .mmi
+(p12) mov GR_Mask = 0x15DC0 // If X > 0, exponent +24000
+(p13) mov GR_Mask = 0x0A240 // If X < 0, exponent -24000
+ nop.i 999
+}
+;;
+
+{ .mmf
+ setf.exp FR_TMP2 = GR_Mask // Form Y_hi = TMP2
+(p13) setf.exp FR_Y_lo = GR_temp // If X < 0, Y_lo = 2^-24100
+(p12) mov FR_Y_lo = f1 // IF X > 0, Y_lo = 1.0
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fmpy.s1 FR_TMP1 = FR_TMP2, FR_Sgn // TMP1 = Y_hi * Sgn
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fmpy.s1 FR_TMP3 = FR_Y_lo,FR_TMP1 // TMP3 = Y_lo * (Y_hi * Sgn)
+ br.cond.sptk POWL_64_SHARED
+}
+;;
+
+POWL_Y_ALMOST_1:
+// Here if delta = |y-1| < 2^-50
+//
+// x**(1 + delta) = x * e (ln(x)*delta) = x ( 1 + ln(x) * delta)
+//
+// Computation will be safe for 2^-16381 <= x < 2^16383
+
+{ .mfi
+ mov GR_exp_ynear1_oflow = 0xffff + 16383
+ fma.s1 FR_TMP1 = FR_Input_X,FR_Delta,f0
+ and GR_exp_x = GR_exp_mask, GR_signexp_x
+}
+;;
+
+{ .mfi
+ cmp.lt p15, p14 = GR_exp_x, GR_exp_ynear1_oflow
+ fma.s1 FR_TMP2 = FR_logx_hi,f1,FR_X_lo
+ mov GR_exp_ynear1_uflow = 0xffff - 16381
+}
+;;
+
+{ .mfb
+(p15) cmp.ge p15, p14 = GR_exp_x, GR_exp_ynear1_uflow
+ fma.s1 FR_TMP3 = FR_Input_X,f1,f0
+ br.cond.sptk POWL_64_SHARED
+};;
+
+POWL_64_SQUARE:
+//
+// Here if x not zero and y=2.
+//
+// Setup for multipath code
+//
+{ .mfi
+ mov GR_exp_square_oflow = 0xffff + 8192 // Exponent where x*x overflows
+ fmerge.se FR_TMP1 = FR_Input_X, FR_Input_X
+ and GR_exp_x = GR_exp_mask, GR_signexp_x // Get exponent of x
+}
+;;
+
+{ .mfi
+ cmp.lt p15, p14 = GR_exp_x, GR_exp_square_oflow // Decide safe/unsafe
+ fmerge.se FR_TMP2 = FR_Input_X, FR_Input_X
+ mov GR_exp_square_uflow = 0xffff - 8191 // Exponent where x*x underflows
+}
+;;
+
+{ .mfi
+(p15) cmp.ge p15, p14 = GR_exp_x, GR_exp_square_uflow // Decide safe/unsafe
+ fma.s1 FR_TMP3 = f0,f0,f0
+ nop.i 999
+}
+;;
+
+//
+// This is the shared path that will set overflow and underflow.
+//
+POWL_64_SHARED:
+
+//
+// Return if no danger of over or underflow.
+//
+{ .mfb
+ nop.m 999
+ fma.s0 FR_Result = FR_TMP1, FR_TMP2, FR_TMP3
+(p15) br.ret.sptk b0 // Main path return if certain no over/underflow
+}
+;;
+
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S2 user supplied status + FZ + TD (Underflows)
+//
+//
+// If (Safe) is true, then
+// Compute result using user supplied status field.
+// No overflow or underflow here, but perhaps inexact.
+// Return
+// Else
+// Determine if overflow or underflow was raised.
+// Fetch +/- overflow threshold for IEEE double extended
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x41 // For underflow test, set S2=User+TD+FTZ
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s2 FR_Result_small = FR_TMP1, FR_TMP2, FR_TMP3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x42 // For overflow test, set S2=User+TD+WRE
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fma.s2 FR_Result_big = FR_TMP1, FR_TMP2,FR_TMP3
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fsetc.s2 0x7F,0x40 // Reset S2=User
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p11, p0 = FR_Result_small, 0x00F // Test small result unorm/zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.ge.s1 p8, p0 = FR_Result_big , FR_Big // Test >= + oflow threshold
+ nop.i 999
+}
+;;
+
+{ .mfb
+(p11) mov GR_Parameter_TAG = 19 // Set tag for underflow
+ fcmp.le.s1 p9, p0 = FR_Result_big, FR_NBig // Test <= - oflow threshold
+(p11) br.cond.spnt __libm_error_region // Branch if pow underflowed
+}
+;;
+
+{ .mfb
+(p8) mov GR_Parameter_TAG = 18 // Set tag for overflow
+ nop.f 999
+(p8) br.cond.spnt __libm_error_region // Branch if pow +overflow
+}
+;;
+
+{ .mbb
+(p9) mov GR_Parameter_TAG = 18 // Set tag for overflow
+(p9) br.cond.spnt __libm_error_region // Branch if pow -overflow
+ br.ret.sptk b0 // Branch if result really ok
+}
+;;
+
+
+POWL_64_SPECIAL:
+// Here if x or y is NatVal, nan, inf, or zero
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p15, p0 = FR_Input_X, f1 // Test x=+1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p8, p0 = FR_Input_X, 0x143 // Test x natval, snan
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p15) fcmp.eq.unc.s0 p6,p0 = FR_Input_Y, f0 // If x=1, flag invalid if y=SNaN
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p15) fmpy.s0 FR_Result = f1,f1 // If x=1, result=1
+(p15) br.ret.spnt b0 // Exit if x=1
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p6, p0 = FR_Input_Y, 0x007 // Test y zero
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p9, p0 = FR_Input_Y, 0x143 // Test y natval, snan
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p10, p0 = FR_Input_X, 0x083 // Test x qnan
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p8) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If x=snan, result=qnan
+(p6) cmp.ne p8,p0 = r0,r0 // Don't exit if x=snan, y=0 ==> result=+1
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p15, p0 = FR_Input_X,0x007 // Test x=0, y=0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fmpy.s0 FR_Result = FR_Input_Y, FR_Input_X // If y=snan, result=qnan
+(p8) br.ret.spnt b0 // Exit if x=snan, y not 0,
+ // result=qnan
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p7, p0 = FR_Input_Y, f1 // Test y +1.0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_X, f0 // If x=qnan, result=qnan
+(p9) br.ret.spnt b0 // Exit if y=snan, result=qnan
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fclass.m.unc p8, p0 = FR_Input_X,0x0C3 // Test x=nan, y=0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p6) fcmp.eq.s0 p9,p0 = FR_Input_X, f0 // If y=0, flag if x denormal
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fadd.s0 FR_Result = f1, f0 // If y=0, result=1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p11, p0 = FR_Input_Y, 0x083 // Test y qnan
+ nop.i 999
+}
+{ .mfb
+(p15) mov GR_Parameter_TAG = 20 // Error tag for x=0, y=0
+(p7) fmpy.s0 FR_Result = FR_Input_X,f1 // If y=1, result=x
+(p15) br.cond.spnt __libm_error_region // Branch if x=0, y=0, result=1
+}
+;;
+
+{ .mfb
+(p8) mov GR_Parameter_TAG = 23 // Error tag for x=nan, y=0
+ fclass.m p14, p0 = FR_Input_Y, 0x023 // Test y inf
+(p8) br.cond.spnt __libm_error_region // Branch if x=snan, y=0,
+ // result=1
+}
+;;
+
+{ .mfb
+ nop.m 999
+ fclass.m p13, p0 = FR_Input_X, 0x023 // Test x inf
+(p6) br.ret.spnt b0 // Exit y=0, x not nan or 0,
+ // result=1
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p14) fcmp.eq.unc.s1 p0,p14 = FR_Input_X,f0 // Test x not 0, y=inf
+(p7) br.ret.spnt b0 // Exit y=1, x not snan,
+ // result=x
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p10) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X // If x=qnan, y not snan,
+ // result=qnan
+(p10) br.ret.spnt b0 // Exit x=qnan, y not snan,
+ // result=qnan
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p11) fmpy.s0 FR_Result = FR_Input_Y,FR_Input_X // If y=qnan, x not nan or 1,
+ // result=qnan
+(p11) br.ret.spnt b0 // Exit y=qnan, x not nan or 1,
+ // result=qnan
+}
+;;
+
+{ .mbb
+ nop.m 999
+(p14) br.cond.spnt POWL_64_Y_IS_INF // Branch if y=inf, x not 1 or nan
+(p13) br.cond.spnt POWL_64_X_IS_INF // Branch if x=inf, y not 1 or nan
+}
+;;
+
+
+POWL_64_X_IS_ZERO:
+// Here if x=0, y not nan or 1 or inf or 0
+
+// There is logic starting here to determine if y is an integer when x = 0.
+// If 0 < |y| < 1 then clearly y is not an integer.
+// If |y| > 1, then the significand of y is shifted left by the size of
+// the exponent of y. This preserves the lsb of the integer part + the
+// fractional bits. The lsb of the integer can be tested to determine if
+// the integer is even or odd. The fractional bits can be tested. If zero,
+// then y is an integer.
+//
+{ .mfi
+ and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent of y
+ nop.f 999
+ and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
+}
+;;
+
+//
+// Maybe y is < 1 already, so
+// can never be an integer.
+//
+{ .mfi
+ cmp.lt p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1
+ nop.f 999
+ sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent of y
+}
+;;
+
+//
+// Shift significand of y looking for nonzero bits
+// For y > 1, shift signif_y exp_y bits to the left
+// For y < 1, turn on 4 low order bits of significand of y
+// so that the fraction will always be non-zero
+//
+{ .mmi
+(p9) or GR_exp_y= 0xF,GR_signif_y // Force nonzero fraction if y<1
+;;
+ nop.m 999
+(p8) shl GR_exp_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
+ // Wait 4 cycles to use result
+}
+;;
+
+{ .mmi
+ nop.m 999
+;;
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mmi
+ nop.m 999
+;;
+ nop.m 999
+ shl GR_fraction_y= GR_exp_y,1 // Shift left 1 to get fraction
+}
+;;
+
+//
+// Integer part of y shifted off.
+// Get y's low even or odd bit - y might not be an int.
+//
+{ .mii
+ cmp.eq p13,p0 = GR_fraction_y, r0 // Test for y integer
+ cmp.eq p8,p0 = GR_y_sign, r0 // Test for y > 0
+;;
+(p13) tbit.nz.unc p13,p0 = GR_exp_y, 63 // Test if y an odd integer
+}
+;;
+
+{ .mfi
+(p13) cmp.eq.unc p13,p14 = GR_y_sign, r0 // Test y pos odd integer
+(p8) fcmp.eq.s0 p12,p0 = FR_Input_Y, f0 // If x=0 and y>0 flag if y denormal
+ nop.i 999
+}
+;;
+
+//
+// Return +/-0 when x=+/-0 and y is positive odd integer
+//
+{ .mfb
+ nop.m 999
+(p13) mov FR_Result = FR_Input_X // If x=0, y pos odd int, result=x
+(p13) br.ret.spnt b0 // Exit x=0, y pos odd int, result=x
+}
+;;
+
+//
+// Return +/-inf when x=+/-0 and y is negative odd int
+//
+{ .mfb
+(p14) mov GR_Parameter_TAG = 21
+(p14) frcpa.s0 FR_Result, p0 = f1, FR_Input_X // Result +-inf, set Z flag
+(p14) br.cond.spnt __libm_error_region
+}
+;;
+
+//
+// Return +0 when x=+/-0 and y positive and not an odd integer
+//
+{ .mfb
+ nop.m 999
+(p8) mov FR_Result = f0 // If x=0, y>0 and not odd integer, result=+0
+(p8) br.ret.sptk b0 // Exit x=0, y>0 and not odd integer, result=+0
+}
+;;
+
+//
+// Return +inf when x=+/-0 and y is negative and not odd int
+//
+{ .mfb
+ mov GR_Parameter_TAG = 21
+ frcpa.s0 FR_Result, p10 = f1,f0 // Result +inf, raise Z flag
+ br.cond.sptk __libm_error_region
+}
+;;
+
+
+POWL_64_X_IS_INF:
+//
+// Here if x=inf, y not 1 or nan
+//
+{ .mfi
+ and GR_exp_y = GR_exp_mask,GR_signexp_y // Get biased exponent y
+ fclass.m p13, p0 = FR_Input_X,0x022 // Test x=-inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ and GR_y_sign = GR_sign_mask,GR_signexp_y // Get sign of y
+ fcmp.eq.s0 p9,p0 = FR_Input_Y, f0 // Dummy to set flag if y denorm
+ nop.i 999
+}
+;;
+
+//
+// Maybe y is < 1 already, so
+// isn't an int.
+//
+{ .mfi
+(p13) cmp.lt.unc p9, p8 = GR_exp_y,GR_exp_bias // Test 0 < |y| < 1 if x=-inf
+ fclass.m p11, p0 = FR_Input_X,0x021 // Test x=+inf
+ sub GR_exp_y = GR_exp_y,GR_exp_bias // Get true exponent y
+}
+;;
+
+//
+// Shift significand of y looking for nonzero bits
+// For y > 1, shift signif_y exp_y bits to the left
+// For y < 1, turn on 4 low order bits of significand of y
+// so that the fraction will always be non-zero
+//
+{ .mmi
+(p9) or GR_exp_y= 0xF,GR_signif_y // Force nonzero fraction if y<1
+;;
+(p11) cmp.eq.unc p14,p12 = GR_y_sign, r0 // Test x=+inf, y>0
+(p8) shl GR_exp_y= GR_signif_y,GR_exp_y // Get lsb of int + fraction
+ // Wait 4 cycles to use result
+}
+;;
+
+//
+// Return +inf for x=+inf, y > 0
+// Return +0 for x=+inf, y < 0
+//
+{ .mfi
+ nop.m 999
+(p12) mov FR_Result = f0 // If x=+inf, y<0, result=+0
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p14) fma.s0 FR_Result = FR_Input_X,f1,f0 // If x=+inf, y>0, result=+inf
+(p11) br.ret.sptk b0 // Exit x=+inf
+}
+;;
+
+//
+// Here only if x=-inf. Wait until can use result of shl...
+//
+{ .mmi
+ nop.m 999
+;;
+ nop.m 999
+ nop.i 999
+}
+;;
+
+{ .mfi
+ cmp.eq p8,p9 = GR_y_sign, r0 // Test y pos
+ nop.f 999
+ shl GR_fraction_y = GR_exp_y,1 // Shift left 1 to get fraction
+}
+;;
+
+{ .mmi
+ cmp.eq p13,p0 = GR_fraction_y, r0 // Test y integer
+;;
+ nop.m 999
+(p13) tbit.nz.unc p13,p0 = GR_exp_y, 63 // Test y odd integer
+}
+;;
+
+//
+// Is y even or odd?
+//
+{ .mii
+(p13) cmp.eq.unc p14,p10 = GR_y_sign, r0 // Test x=-inf, y pos odd int
+(p13) cmp.ne.and p8,p9 = r0,r0 // If y odd int, turn off p8,p9
+ nop.i 999
+}
+;;
+
+//
+// Return -0 for x = -inf and y < 0 and odd int.
+// Return -Inf for x = -inf and y > 0 and odd int.
+//
+{ .mfi
+ nop.m 999
+(p10) fmerge.ns FR_Result = f0, f0 // If x=-inf, y neg odd int, result=-0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p14) fmpy.s0 FR_Result = FR_Input_X,f1 // If x=-inf, y pos odd int, result=-inf
+ nop.i 999
+}
+;;
+
+//
+// Return Inf for x = -inf and y > 0 not an odd int.
+// Return +0 for x = -inf and y < 0 not an odd int.
+//
+.pred.rel "mutex",p8,p9
+{ .mfi
+ nop.m 999
+(p8) fmerge.ns FR_Result = FR_Input_X, FR_Input_X // If x=-inf, y>0 not odd int
+ // result=+inf
+ nop.i 999
+}
+{ .mfb
+ nop.m 999
+(p9) fmpy.s0 FR_Result = f0,f0 // If x=-inf, y<0 not odd int
+ // result=+0
+ br.ret.sptk b0 // Exit for x=-inf
+}
+;;
+
+
+POWL_64_Y_IS_INF:
+// Here if y=inf, x not 1 or nan
+//
+// For y = +Inf and |x| < 1 returns 0
+// For y = +Inf and |x| > 1 returns Inf
+// For y = -Inf and |x| < 1 returns Inf
+// For y = -Inf and |x| > 1 returns 0
+// For y = Inf and |x| = 1 returns 1
+//
+{ .mfi
+ nop.m 999
+ fclass.m p8, p0 = FR_Input_Y, 0x021 // Test y=+inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fclass.m p9, p0 = FR_Input_Y, 0x022 // Test y=-inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fabs FR_X = FR_Input_X // Form |x|
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s0 p10,p0 = FR_Input_X, f0 // flag if x denormal
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.lt.unc.s1 p6, p0 = FR_X, f1 // Test y=+inf, |x|<1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p8) fcmp.gt.unc.s1 p7, p0 = FR_X, f1 // Test y=+inf, |x|>1
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fcmp.lt.unc.s1 p12, p0 = FR_X, f1 // Test y=-inf, |x|<1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p6) fmpy.s0 FR_Result = f0,f0 // If y=+inf, |x|<1, result=+0
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p9) fcmp.gt.unc.s1 p13, p0 = FR_X, f1 // Test y=-inf, |x|>1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p7) fmpy.s0 FR_Result = FR_Input_Y, f1 // If y=+inf, |x|>1, result=+inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+ fcmp.eq.s1 p14, p0 = FR_X, f1 // Test y=inf, |x|=1
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p12) fnma.s0 FR_Result = FR_Input_Y, f1, f0 // If y=-inf, |x|<1, result=+inf
+ nop.i 999
+}
+;;
+
+{ .mfi
+ nop.m 999
+(p13) mov FR_Result = f0 // If y=-inf, |x|>1, result=+0
+ nop.i 999
+}
+;;
+
+{ .mfb
+ nop.m 999
+(p14) fmpy.s0 FR_Result = f1,f1 // If y=inf, |x|=1, result=+1
+ br.ret.sptk b0 // Common return for y=inf
+}
+;;
+
+
+// Here if x or y denorm/unorm
+POWL_DENORM:
+{ .mmi
+ getf.sig GR_signif_Z = FR_norm_X // Get significand of x
+;;
+ getf.exp GR_signexp_y = FR_norm_Y // Get sign and exp of y
+ nop.i 999
+}
+;;
+
+{ .mfi
+ getf.sig GR_signif_y = FR_norm_Y // Get significand of y
+ nop.f 999
+ nop.i 999
+}
+;;
+
+{ .mib
+ getf.exp GR_signexp_x = FR_norm_X // Get sign and exp of x
+ extr.u GR_Index1 = GR_signif_Z, 59, 4 // Extract upper 4 signif bits of x
+ br.cond.sptk POWL_COMMON // Branch back to main path
+}
+;;
+
+
+POWL_64_UNSUPPORT:
+//
+// Raise exceptions for specific
+// values - pseudo NaN and
+// infinities.
+// Return NaN and raise invalid
+//
+{ .mfb
+ nop.m 999
+ fmpy.s0 FR_Result = FR_Input_X,f0
+ br.ret.sptk b0
+}
+;;
+
+POWL_64_XNEG:
+//
+// Raise invalid for x < 0 and
+// y not an integer
+//
+{ .mfi
+ nop.m 999
+ frcpa.s0 FR_Result, p8 = f0, f0
+ mov GR_Parameter_TAG = 22
+}
+{ .mib
+ nop.m 999
+ nop.i 999
+ br.cond.sptk __libm_error_region
+}
+;;
+
+POWL_64_SQRT:
+{ .mfi
+ nop.m 999
+ frsqrta.s0 FR_Result,p10 = FR_save_Input_X
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f62=FR_Half,FR_save_Input_X,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f63=FR_Result,FR_Result,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 f32=f63,f62,FR_Half
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f33=f32,FR_Result,FR_Result
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f34=f33,f62,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 f35=f34,f33,FR_Half
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f63=f35,f33,f33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f32=FR_save_Input_X,f63,f0
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 FR_Result=f63,f62,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f33=f11,f63,f0
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 f34=f32,f32,FR_save_Input_X
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 f35=FR_Result,f63,FR_Half
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f62=f33,f34,f32
+ nop.i 999
+}
+{ .mfi
+ nop.m 999
+(p10) fma.s1 f63=f33,f35,f33
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fnma.s1 f32=f62,f62,FR_save_Input_X
+ nop.i 999 ;;
+}
+{ .mfb
+ nop.m 999
+(p10) fma.s0 FR_Result=f32,f63,f62
+ br.ret.sptk b0 // Exit for x > 0, y = 0.5
+}
+;;
+
+GLOBAL_IEEE754_END(powl)
+libm_alias_ldouble_other (pow, pow)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Input_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_save_Input_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_Result // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region#)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_rem_pio2l.c b/sysdeps/ia64/fpu/e_rem_pio2l.c
new file mode 100644
index 0000000000..41254ae60a
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_rem_pio2l.c
@@ -0,0 +1 @@
+/* Not needed. */
diff --git a/sysdeps/ia64/fpu/e_remainder.S b/sysdeps/ia64/fpu/e_remainder.S
new file mode 100644
index 0000000000..af4b5fe36f
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_remainder.S
@@ -0,0 +1,589 @@
+.file "remainder.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//====================================================================
+// 02/02/00 Initial version
+// 03/02/00 New Algorithm
+// 04/04/00 Unwind support added
+// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/29/00 Set FR_Y to f9
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//====================================================================
+// double remainder(double,double);
+//
+// Overview of operation
+//====================================================================
+// remainder(a,b)=a-i*b,
+// where i is an integer such that, if b!=0 and a is finite,
+// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
+//
+// Algorithm
+//====================================================================
+// a). eliminate special cases
+// b). if |a/b|<0.25 (first quotient estimate), return a
+// c). use single precision divide algorithm to get quotient q
+// rounded to 24 bits of precision
+// d). calculate partial remainders (using both q and q-ulp);
+// select one and RZ(a/b) based on the sign of |a|-|b|*q
+// e). if the exponent difference (exponent(a)-exponent(b))
+// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
+// and sticky bits to round to integer; exit loop and
+// calculate final remainder
+// f). if exponent(a)-exponent(b)>=24, select new value of a as
+// the partial remainder calculated using RZ(a/b);
+// repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+
+// Registers used
+//====================================================================
+// Predicate registers: p6-p14
+// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15,f32
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(remainder)
+
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f13=|a|
+ fmerge.s f13=f0,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // f14=|b|
+ fmerge.s f14=f0,f9
+ nop.i 0;;
+}
+ {.mlx
+ mov r28=0x2ffdd
+ // r2=2^{23}
+ movl r3=0x4b000000;;
+}
+
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ setf.exp f32=r28
+ fclass.m.unc p11,p0 = f9, 0xe7
+ nop.i 999
+}
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999;;
+}
+
+{.mfi
+ nop.m 0
+ mov f12=f0
+ nop.i 0
+}
+{ .mfi
+ // set p7=1
+ cmp.eq.unc p7,p0=r0,r0
+ // Step (1)
+ // y0 = 1 / b in f10
+ frcpa.s1 f10,p6=f13,f14
+ nop.i 0;;
+}
+
+{.bbb
+ (p9) br.cond.spnt FREM_X_NAN_INF
+ (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
+ nop.b 0
+} {.mfi
+ nop.m 0
+ // set D flag if a (f8) is denormal
+ fnma.s0 f6=f8,f1,f8
+ nop.i 0;;
+}
+
+
+remloop24:
+ { .mfi
+ nop.m 0
+ // Step (2)
+ // q0 = a * y0 in f12
+ (p6) fma.s1 f12=f13,f10,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // e0 = 1 - b * y0 in f7
+ (p6) fnma.s1 f7=f14,f10,f1
+ nop.i 0;;
+} {.mlx
+ nop.m 0
+ // r2=1.25*2^{-24}
+ movl r2=0x33a00000;;
+}
+
+{.mfi
+ nop.m 0
+ // q1=q0*(1+e0)
+ (p6) fma.s1 f15=f12,f7,f12
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (4)
+ // e1 = e0 * e0 + E in f7
+ (p6) fma.s1 f7=f7,f7,f32
+ nop.i 0;;
+}
+ {.mii
+ (p7) getf.exp r29=f12
+ (p7) mov r28=0xfffd
+ nop.i 0;;
+}
+ { .mfi
+ // f12=2^{23}
+ setf.s f12=r3
+ // Step (5)
+ // q2 = q1 + e1 * q1 in f11
+ (p6) fma.s.s1 f11=f7,f15,f15
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // q2 = q1 + e1 * q1 in f6
+ (p6) fma.s1 f6=f7,f15,f15
+ nop.i 0;;
+}
+
+ {.mmi
+ // f15=1.25*2^{-24}
+ setf.s f15=r2
+ // q<1/4 ? (i.e. expon< -2)
+ (p7) cmp.gt p7,p0=r28,r29
+ nop.i 0;;
+}
+
+{.mfb
+ // r29= -32+bias
+ mov r29=0xffdf
+ // if |a/b|<1/4, set D flag before returning
+ (p7) fma.d.s0 f9=f9,f0,f8
+ nop.b 0;;
+}
+ {.mfb
+ nop.m 0
+ // can be combined with bundle above if sign of 0 or
+ // FTZ enabled are not important
+ (p7) fmerge.s f8=f8,f9
+ // return if |a|<4*|b| (estimated quotient < 1/4)
+ (p7) br.ret.spnt b0;;
+}
+ {.mfi
+ // f7=2^{-32}
+ setf.exp f7=r29
+ // set f8 to current a value | sign
+ fmerge.s f8=f8,f13
+ nop.i 0;;
+}
+
+
+ {.mfi
+ getf.exp r28=f6
+ // last step ? (q<2^{23})
+ fcmp.lt.unc.s1 p0,p12=f6,f12
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r=a-b*q
+ fnma.s1 f6=f14,f11,f13
+ nop.i 0
+} {.mfi
+ // r2=23+bias
+ mov r2=0xffff+23
+ // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
+ fnma.s.s1 f15=f11,f15,f11
+ nop.i 0;;
+}
+ {.mmi
+ nop.m 0
+ cmp.eq p11,p14=r2,r28
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p11,p14
+ {.mfi
+ nop.m 0
+ // if exp_q=2^23, then r=a-b*2^{23}
+ (p11) fnma.s1 f13=f12,f14,f13
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r2=a-b*q'
+ (p14) fnma.s1 f13=f14,f15,f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r>0 iff q=RZ(a/b) and inexact
+ fcmp.gt.unc.s1 p8,p0=f6,f0
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // r<0 iff q'=RZ(a/b) and inexact
+ (p14) fcmp.lt.unc.s1 p9,p10=f6,f0
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p9
+ {.mfi
+ nop.m 0
+ // (p8) Q=q+(last iteration ? sticky bits:0)
+ // i.e. Q=q+q*x (x=2^{-32} or 0)
+ (p8) fma.s1 f11=f11,f7,f11
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // (p9) Q=q'+(last iteration ? sticky bits:0)
+ // i.e. Q=q'+q'*x (x=2^{-32} or 0)
+ (p9) fma.s1 f11=f15,f7,f15
+ nop.i 0;;
+}
+
+ {.mfb
+ nop.m 0
+ // (p9) set r=r2 (new a, if not last iteration)
+ // (p10) new a =r
+ (p10) mov f13=f6
+ (p12) br.cond.sptk remloop24;;
+}
+
+// last iteration
+ {.mfi
+ nop.m 0
+ // set f9=|b|*sgn(a)
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // round to integer
+ fcvt.fx.s1 f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // save sign of a
+ fmerge.s f7=f8,f8
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // normalize
+ fcvt.xf f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // This can be removed if sign of 0 is not important
+ // get remainder using sf1
+ fnma.d.s1 f12=f9,f11,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // get remainder
+ fnma.d.s0 f8=f9,f11,f8
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // f12=0?
+ // This can be removed if sign of 0 is not important
+ fcmp.eq.unc.s1 p8,p0=f12,f0
+ nop.i 0;;
+}
+ {.mfb
+ nop.m 0
+ // if f8=0, set sign correctly
+ // This can be removed if sign of 0 is not important
+ (p8) fmerge.s f8=f7,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+FREM_X_NAN_INF:
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FREM_Y_ZERO;;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p8,p0 = f8, 0x23
+ nop.i 999
+}
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m.unc p0,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p7 = f8,f8
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+(p11) mov f10=f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) fma.d.s0 f8=f8,f1,f0
+ nop.i 0 ;;
+}
+
+{ .mfb
+ nop.m 999
+ frcpa.s0 f8,p7=f8,f9
+ (p11) br.cond.spnt EXP_ERROR_RETURN;;
+}
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.ret.spnt b0 ;;
+}
+
+
+FREM_Y_NAN_INF_ZERO:
+
+// Y INF
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.d.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.d.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+FREM_Y_ZERO:
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+// X NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa.s0 f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa.s0 f11,p7 = f0,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.d.s0 f8=f11,f1,f0
+ nop.i 999
+}
+
+
+EXP_ERROR_RETURN:
+
+{ .mib
+ mov GR_Parameter_TAG = 124
+ nop.i 999
+ br.sptk __libm_error_region;;
+}
+
+GLOBAL_IEEE754_END(remainder)
+libm_alias_double_other (__remainder, remainder)
+weak_alias (__remainder, drem)
+
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainderf.S b/sysdeps/ia64/fpu/e_remainderf.S
new file mode 100644
index 0000000000..6d2f77aa59
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_remainderf.S
@@ -0,0 +1,607 @@
+.file "remainderf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//====================================================================
+// 02/02/00 Initial version
+// 03/02/00 New algorithm
+// 04/04/00 Unwind support added
+// 07/21/00 Fixed quotient=2^{24*m+23} bug
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/29/00 Set FR_Y to f9
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//====================================================================
+// float remainderf(float,float);
+//
+// Overview of operation
+//====================================================================
+// remainder(a,b)=a-i*b,
+// where i is an integer such that, if b!=0 and a is finite,
+// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
+//
+// Algorithm
+//====================================================================
+// a). eliminate special cases
+// b). if |a/b|<0.25 (first quotient estimate), return a
+// c). use single precision divide algorithm to get quotient q
+// rounded to 24 bits of precision
+// d). calculate partial remainders (using both q and q-ulp);
+// select one and RZ(a/b) based on the sign of |a|-|b|*q
+// e). if the exponent difference (exponent(a)-exponent(b))
+// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
+// and sticky bits to round to integer; exit loop and
+// calculate final remainder
+// f). if exponent(a)-exponent(b)>=24, select new value of a as
+// the partial remainder calculated using RZ(a/b);
+// repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p12
+// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15
+//
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(remainderf)
+
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f13=|a|
+ fmerge.s f13=f0,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // f14=|b|
+ fmerge.s f14=f0,f9
+ nop.i 0;;
+}
+ {.mlx
+ nop.m 0
+ // r2=2^{24}-2
+ movl r3=0x4b7ffffe;;
+}
+
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f9, 0xe7
+ nop.i 999
+}
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f8, 0xe3
+ nop.i 999;;
+}
+
+{.mfi
+ nop.m 0
+ mov f15=f0
+ nop.i 0
+}
+{ .mfi
+ // set p7=1
+ cmp.eq.unc p7,p0=r0,r0
+ // Step (1)
+ // y0 = 1 / b in f10
+ frcpa.s1 f10,p6=f13,f14
+ nop.i 0;;
+}
+{.bbb
+ (p9) br.cond.spnt FREM_X_NAN_INF
+ (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
+ nop.b 0
+} {.mfi
+ nop.m 0
+ // set D flag if a (f8) is denormal
+ fnma.s0 f6=f8,f1,f8
+ nop.i 0;;
+}
+
+.align 32
+remloop24:
+ { .mfi
+ // f12=2^{24}-2
+ setf.s f12=r3
+ // Step (2)
+ // q0 = a * y0 in f15
+ (p6) fma.s1 f15=f13,f10,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (3)
+ // e0 = 1 - b * y0 in f7
+ (p6) fnma.s1 f7=f14,f10,f1
+ nop.i 0;;
+}
+{.mlx
+ nop.m 0
+ // r2=1.25*2^{-24}
+ movl r2=0x33a00000;;
+}
+ { .mfi
+ nop.m 0
+ // Step (4)
+ // q1 = q0 + e0 * q0 in f6
+ (p6) fma.s1 f6=f7,f15,f15
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (5)
+ // e1 = e0 * e0 in f7
+ (p6) fma.s1 f7=f7,f7,f0
+ nop.i 0;;
+}
+ {.mii
+ (p7) getf.exp r29=f15
+ (p7) mov r28=0xfffd
+ nop.i 0;;
+}
+
+ { .mfi
+ // f15=1.25*2^{-24}
+ setf.s f15=r2
+ // Step (6)
+ // q2 = q1 + e1 * q1 in f6
+ (p6) fma.s1 f6=f7,f6,f6
+ nop.i 0
+}
+{ .mfi
+ mov r2=0x3e7
+ // Step (7)
+ // e2 = e1 * e1 in f7
+ (p6) fma.s1 f7=f7,f7,f0
+ nop.i 0;;
+}
+
+ {.mmi
+ // q<1/4 ? (i.e. expon< -2)
+ (p7) cmp.gt.unc p7,p0=r28,r29
+ nop.m 0
+ // r2=0x3e7000000
+ shl r2=r2,24;;
+}
+
+{.mfb
+ // r2=0x3e7000001
+ add r2=1,r2
+ // if |a/b|<1/4, set D flag before returning
+ (p7) fma.s.s0 f9=f9,f0,f8
+ nop.b 0;;
+}
+ {.mfb
+ nop.m 0
+ // can be combined with bundle above if sign of 0 or
+ // FTZ enabled are not important
+ (p7) fmerge.s f8=f8,f9
+ // return if |a|<4*|b| (estimated quotient < 1/4)
+ (p7) br.ret.spnt b0;;
+}
+ {.mfi
+ nop.m 0
+ // set f8 to current a value | sign
+ fmerge.s f8=f8,f13
+ // r2=2^{-24}+2^{-48} (double prec.)
+ shl r2=r2,28;;
+}
+
+
+{ .mfi
+ // r29= -32+bias
+ mov r29=0xffdf
+ // Step (8)
+ // q3 = q2 + e2 * q2 in f6
+ (p6) fma.d.s1 f6=f7,f6,f6
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+ // Step (9)
+ // q = q3 in f11
+ (p6) fma.s.s1 f11=f6,f1,f0
+ nop.i 0;;
+}
+ {.mfi
+ // f7=2^{-24}
+ setf.d f7=r2
+ // last step ? (q3<2^{24}-2 --> q<2^{24})
+ fcmp.lt.unc.s1 p0,p12=f6,f12
+ nop.i 0
+} {.mfi
+ // f12=2^{-32}
+ setf.exp f12=r29
+ nop.f 0
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r=a-b*q
+ fnma.s1 f6=f14,f11,f13
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
+ fnma.s.s1 f15=f11,f15,f11
+ nop.i 0;;
+}
+
+ {.mfi
+ nop.m 0
+ // r2=a-b*q'
+ fnma.s1 f13=f14,f15,f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r>0 iff q=RZ(a/b) and inexact
+ fcmp.gt.unc.s1 p8,p0=f6,f0
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r<0 iff q'=RZ(a/b) and inexact
+ fcmp.lt.unc.s1 p9,p10=f6,f0
+ nop.i 0;;
+}
+.pred.rel "mutex",p8,p9
+ {.mfi
+ nop.m 0
+ // (p8) Q=q+(last iteration ? sticky bits:0)
+ // i.e. Q=q+q*x (x=2^{-32} or 0)
+ (p8) fma.s1 f11=f11,f12,f11
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // (p9) Q=q'+(last iteration ? sticky bits:0)
+ // i.e. Q=q'+q'*x (x=2^{-24} or 0: if expon. difference=23, want to round back to q)
+ (p9) fma.s1 f11=f15,f7,f15
+ nop.i 0;;
+}
+
+ {.mfb
+ nop.m 0
+ // (p9) set r=r2 (new a, if not last iteration)
+ // (p10) new a =r
+ (p10) mov f13=f6
+ (p12) br.cond.sptk remloop24;;
+}
+
+// last iteration
+ {.mfi
+ nop.m 0
+ // set f9=|b|*sgn(a)
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // round to integer
+ fcvt.fx.s1 f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // save sign of a
+ fmerge.s f7=f8,f8
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // normalize
+ fcvt.xf f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // This can be removed if sign of 0 is not important
+ // get remainder using sf1
+ fnma.s.s1 f12=f9,f11,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // get remainder
+ fnma.s.s0 f8=f9,f11,f8
+ nop.i 0;;
+}
+
+
+
+ {.mfi
+ nop.m 0
+ // f12=0?
+ // This can be removed if sign of 0 is not important
+ fcmp.eq.unc.s1 p8,p0=f12,f0
+ nop.i 0;;
+}
+ {.mfb
+ nop.m 0
+ // if f8=0, set sign correctly
+ // This can be removed if sign of 0 is not important
+ (p8) fmerge.s f8=f7,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+FREM_X_NAN_INF:
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FREM_Y_ZERO;;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p8,p0 = f8, 0x23
+ nop.i 999
+}
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m.unc p0,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fma.s0 f9=f9,f1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p7 = f8,f8
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+(p11) mov f10=f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s.s0 f8=f8,f1,f0
+ nop.i 0 ;;
+}
+
+{ .mfb
+ nop.m 999
+ frcpa.s0 f8,p7=f8,f9
+ (p11) br.cond.spnt EXP_ERROR_RETURN;;
+}
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.ret.spnt b0 ;;
+}
+
+
+FREM_Y_NAN_INF_ZERO:
+
+// Y INF
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p0 = f9, 0xc3
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.s.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+FREM_Y_ZERO:
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+// X NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa.s0 f11,p7=f8,f0
+ nop.i 0;;
+}
+
+{ .mfi
+ nop.m 999
+(p10) frcpa.s0 f11,p7 = f0,f0
+nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s.s0 f8=f11,f1,f0
+ nop.i 999
+}
+
+
+EXP_ERROR_RETURN:
+
+{ .mib
+ mov GR_Parameter_TAG = 125
+ nop.i 999
+ br.sptk __libm_error_region;;
+}
+
+GLOBAL_IEEE754_END(remainderf)
+libm_alias_float_other (__remainder, remainder)
+weak_alias (__remainderf, dremf)
+
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#;; // Call error handling function
+}
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_remainderl.S b/sysdeps/ia64/fpu/e_remainderl.S
new file mode 100644
index 0000000000..f411d80906
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_remainderl.S
@@ -0,0 +1,614 @@
+.file "remainderl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//====================================================================
+// 02/02/00 Initial version
+// 03/02/00 New algorithm
+// 04/04/00 Unwind support added
+// 07/21/00 Fixed quotient=2^{24*m+23}*1.q1...q23 1 bug
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 11/29/00 Set FR_Y to f9
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+// API
+//====================================================================
+// long double remainderl(long double,long double);
+//
+// Overview of operation
+//====================================================================
+// remainder(a,b)=a-i*b,
+// where i is an integer such that, if b!=0 and a is finite,
+// |a/b-i|<=1/2. If |a/b-i|=1/2, i is even.
+//
+// Algorithm
+//====================================================================
+// a). eliminate special cases
+// b). if |a/b|<0.25 (first quotient estimate), return a
+// c). use single precision divide algorithm to get quotient q
+// rounded to 24 bits of precision
+// d). calculate partial remainders (using both q and q-ulp);
+// select one and RZ(a/b) based on the sign of |a|-|b|*q
+// e). if the exponent difference (exponent(a)-exponent(b))
+// is less than 24 (quotient estimate<2^{24}-2), use RZ(a/b)
+// and sticky bits to round to integer; exit loop and
+// calculate final remainder
+// f). if exponent(a)-exponent(b)>=24, select new value of a as
+// the partial remainder calculated using RZ(a/b);
+// repeat from c).
+//
+// Special cases
+//====================================================================
+// a=+/- Inf, or b=+/-0: return NaN, call libm_error_support
+// a=NaN or b=NaN: return NaN
+//
+// Registers used
+//====================================================================
+// Predicate registers: p6-p14
+// General registers: r2,r3,r28,r29,r32 (ar.pfs), r33-r39
+// Floating point registers: f6-f15,f32
+//
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+GR_SAVE_SP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f10
+FR_Y = f9
+FR_RESULT = f8
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(remainderl)
+
+// inputs in f8, f9
+// result in f8
+
+{ .mfi
+ alloc r32=ar.pfs,1,4,4,0
+ // f13=|a|
+ fmerge.s f13=f0,f8
+ nop.i 0
+}
+ {.mfi
+ getf.sig r29=f9
+ // f14=|b|
+ fmerge.s f14=f0,f9
+ nop.i 0;;
+}
+ {.mlx
+ mov r28=0x2ffdd
+ // r2=2^{23}
+ movl r3=0x4b000000;;
+}
+
+
+{.mmi
+setf.exp f32=r28
+nop.m 0
+// y pseudo-zero ?
+cmp.eq p11,p10=r29,r0;;
+}
+
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ nop.m 999
+(p10) fclass.m p11,p10 = f9, 0xe7
+ nop.i 999
+}
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p8 = f8, 0xe3
+ nop.i 999;;
+}
+
+{.mfi
+ nop.m 0
+ mov f12=f0
+ nop.i 0
+}
+{ .mfi
+ // set p7=1
+ cmp.eq.unc p7,p0=r0,r0
+ // Step (1)
+ // y0 = 1 / b in f10
+ frcpa.s1 f10,p6=f13,f14
+ nop.i 0;;
+}
+// Y +-NAN, +-inf, +-0? p11
+{ .mfi
+ nop.m 999
+ // pseudo-NaN ?
+(p10) fclass.nm p11,p0 = f9, 0xff
+ nop.i 999
+}
+
+// qnan snan inf norm unorm 0 -+
+// 1 1 1 0 0 0 11
+// e 3
+// X +-NAN, +-inf, ? p9
+
+{ .mfi
+ nop.m 999
+(p8) fclass.nm p9,p0 = f8, 0xff
+ nop.i 999;;
+}
+
+{.bbb
+ (p9) br.cond.spnt FREM_X_NAN_INF
+ (p11) br.cond.spnt FREM_Y_NAN_INF_ZERO
+ nop.b 0
+} {.mfi
+ nop.m 0
+ // set D flag if a (f8) is denormal
+ fnma.s0 f6=f8,f1,f8
+ nop.i 0;;
+}
+
+remloop24:
+ { .mfi
+ nop.m 0
+ // Step (2)
+ // q0 = a * y0 in f15
+ (p6) fma.s1 f12=f13,f10,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // e0 = 1 - b * y0 in f7
+ (p6) fnma.s1 f7=f14,f10,f1
+ nop.i 0;;
+} {.mlx
+ nop.m 0
+ // r2=1.25*2^{-24}
+ movl r2=0x33a00000;;
+}
+
+{.mfi
+ nop.m 0
+ // q1=q0*(1+e0)
+ (p6) fma.s1 f15=f12,f7,f12
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ // Step (4)
+ // e1 = e0 * e0 + E in f7
+ (p6) fma.s1 f7=f7,f7,f32
+ nop.i 0;;
+}
+ {.mii
+ (p7) getf.exp r29=f12
+ (p7) mov r28=0xfffd
+ nop.i 0;;
+}
+
+ { .mfi
+ // f12=2^{23}
+ setf.s f12=r3
+ // Step (5)
+ // q2 = q1 + e1 * q1 in f11
+ (p6) fma.s.s1 f11=f7,f15,f15
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // q2 = q1 + e1 * q1 in f6
+ (p6) fma.s1 f6=f7,f15,f15
+ nop.i 0;;
+}
+
+ {.mmi
+ // f15=1.25*2^{-24}
+ setf.s f15=r2
+ // q<1/4 ? (i.e. expon< -2)
+ (p7) cmp.gt p7,p0=r28,r29
+ nop.i 0;;
+}
+
+{.mfb
+ // r29= -32+bias
+ mov r29=0xffdf
+ // if |a/b|<1/4, set D flag before returning
+ (p7) fma.s0 f9=f9,f0,f8
+ nop.b 0;;
+}
+ {.mfb
+ nop.m 0
+ // can be combined with bundle above if sign of 0 or
+ // FTZ enabled are not important
+ (p7) fmerge.s f8=f8,f9
+ // return if |a|<4*|b| (estimated quotient < 1/4)
+ (p7) br.ret.spnt b0;;
+}
+ {.mfi
+ // f7=2^{-32}
+ setf.exp f7=r29
+ // set f8 to current a value | sign
+ fmerge.s f8=f8,f13
+ nop.i 0;;
+}
+ {.mfi
+ getf.exp r28=f6
+ // last step ? (q<2^{23})
+ fcmp.lt.unc.s1 p0,p12=f6,f12
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r=a-b*q
+ fnma.s1 f6=f14,f11,f13
+ nop.i 0
+} {.mfi
+ // r2=23+bias
+ mov r2=0xffff+23
+ // q'=q-q*(1.25*2^{-24}) (q'=q-ulp)
+ fnma.s.s1 f15=f11,f15,f11
+ nop.i 0;;
+}
+ {.mmi
+ nop.m 0
+ cmp.eq p11,p14=r2,r28
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p11,p14
+ {.mfi
+ nop.m 0
+ // if exp_q=2^23, then r=a-b*2^{23}
+ (p11) fnma.s1 f13=f12,f14,f13
+ nop.i 0
+}
+{.mfi
+ nop.m 0
+ // r2=a-b*q'
+ (p14) fnma.s1 f13=f14,f15,f13
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // r>0 iff q=RZ(a/b) and inexact
+ fcmp.gt.unc.s1 p8,p0=f6,f0
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // r<0 iff q'=RZ(a/b) and inexact
+ (p14) fcmp.lt.unc.s1 p9,p10=f6,f0
+ nop.i 0;;
+}
+
+.pred.rel "mutex",p8,p9
+ {.mfi
+ nop.m 0
+ // (p8) Q=q+(last iteration ? sticky bits:0)
+ // i.e. Q=q+q*x (x=2^{-32} or 0)
+ (p8) fma.s1 f11=f11,f7,f11
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // (p9) Q=q'+(last iteration ? sticky bits:0)
+ // i.e. Q=q'+q'*x (x=2^{-32} or 0)
+ (p9) fma.s1 f11=f15,f7,f15
+ nop.i 0;;
+}
+
+ {.mfb
+ nop.m 0
+ // (p9) set r=r2 (new a, if not last iteration)
+ // (p10) new a =r
+ (p10) mov f13=f6
+ (p12) br.cond.sptk remloop24;;
+}
+
+// last iteration
+ {.mfi
+ nop.m 0
+ // set f9=|b|*sgn(a)
+ fmerge.s f9=f8,f9
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // round to integer
+ fcvt.fx.s1 f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // save sign of a
+ fmerge.s f7=f8,f8
+ nop.i 0
+} {.mfi
+ nop.m 0
+ // normalize
+ fcvt.xf f11=f11
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // This can be removed if sign of 0 is not important
+ // get remainder using sf1
+ fnma.s1 f12=f9,f11,f8
+ nop.i 0
+}
+ {.mfi
+ nop.m 0
+ // get remainder
+ fnma.s0 f8=f9,f11,f8
+ nop.i 0;;
+}
+ {.mfi
+ nop.m 0
+ // f12=0?
+ // This can be removed if sign of 0 is not important
+ fcmp.eq.unc.s1 p8,p0=f12,f0
+ nop.i 0;;
+}
+ {.mfb
+ nop.m 0
+ // if f8=0, set sign correctly
+ // This can be removed if sign of 0 is not important
+ (p8) fmerge.s f8=f7,f8
+ // return
+ br.ret.sptk b0;;
+}
+
+
+
+FREM_X_NAN_INF:
+
+// Y zero ?
+{.mfi
+ nop.m 0
+ fma.s1 f10=f9,f1,f0
+ nop.i 0;;
+}
+{.mfi
+ nop.m 0
+ fcmp.eq.unc.s1 p11,p0=f10,f0
+ nop.i 0;;
+}
+{.mib
+ nop.m 0
+ nop.i 0
+ // if Y zero
+ (p11) br.cond.spnt FREM_Y_ZERO;;
+}
+
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p8,p0 = f8, 0x23
+ nop.i 999
+}
+// X infinity? Return QNAN indefinite
+{ .mfi
+ nop.m 999
+ fclass.m.unc p11,p0 = f8, 0x23
+ nop.i 999;;
+}
+// Y NaN ?
+{.mfi
+ nop.m 999
+(p8) fclass.m.unc p0,p8=f9,0xc3
+ nop.i 0;;
+}
+{.mfi
+ nop.m 999
+ // also set Denormal flag if necessary
+(p8) fnma.s0 f9=f9,f1,f9
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) frcpa.s0 f8,p7 = f8,f8
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+(p11) mov f10=f8
+ nop.i 0
+}
+{ .mfi
+ nop.m 999
+(p8) fma.s0 f8=f8,f1,f0
+ nop.i 0 ;;
+}
+
+{ .mfb
+ nop.m 999
+ frcpa.s0 f8,p7=f8,f9
+ (p11) br.cond.spnt EXP_ERROR_RETURN;;
+}
+{ .mib
+ nop.m 0
+ nop.i 0
+ br.ret.spnt b0 ;;
+}
+
+
+FREM_Y_NAN_INF_ZERO:
+// Y INF
+{ .mfi
+ nop.m 999
+ fclass.m.unc p7,p0 = f9, 0x23
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p7) fma.s0 f8=f8,f1,f0
+(p7) br.ret.spnt b0 ;;
+}
+
+// Y NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p10 = f9, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p0 = f9, 0xff
+ nop.i 999 ;;
+}
+
+{ .mfb
+ nop.m 999
+(p9) fma.s0 f8=f9,f1,f0
+(p9) br.ret.spnt b0 ;;
+}
+
+FREM_Y_ZERO:
+// Y zero? Must be zero at this point
+// because it is the only choice left.
+// Return QNAN indefinite
+
+// X NAN?
+{ .mfi
+ nop.m 999
+ fclass.m.unc p9,p10 = f8, 0xc3
+ nop.i 999 ;;
+}
+{ .mfi
+ nop.m 999
+(p10) fclass.nm p9,p10 = f8, 0xff
+ nop.i 999 ;;
+}
+
+{.mfi
+ nop.m 999
+ (p9) frcpa.s0 f11,p7=f8,f0
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 999
+(p10) frcpa.s0 f11,p7 = f0,f0
+ nop.i 999;;
+}
+
+{ .mfi
+ nop.m 999
+ fmerge.s f10 = f8, f8
+ nop.i 999
+}
+
+{ .mfi
+ nop.m 999
+ fma.s0 f8=f11,f1,f0
+ nop.i 999;;
+}
+
+EXP_ERROR_RETURN:
+
+{ .mib
+ mov GR_Parameter_TAG = 123
+ nop.i 999
+ br.sptk __libm_error_region;;
+}
+
+GLOBAL_IEEE754_END(remainderl)
+libm_alias_ldouble_other (__remainder, remainder)
+weak_alias (__remainderl, dreml)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalb.S b/sysdeps/ia64/fpu/e_scalb.S
new file mode 100644
index 0000000000..c7b0e46b45
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_scalb.S
@@ -0,0 +1,598 @@
+.file "scalb.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 01/26/01 Scalb completely reworked and now standalone version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
+//
+// API
+//==============================================================
+// double = scalb (double x, double n)
+// input floating point f8 and floating point f9
+// output floating point f8
+//
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x103fe -> Certain overflow
+// exp_Result = 0x103fe -> Possible overflow
+// 0x0fc01 <= exp_Result < 0x103fe -> No over/underflow (main path)
+// 0x0fc01 - 52 <= exp_Result < 0x0fc01 -> Possible underflow
+// exp_Result < 0x0fc01 - 52 -> Certain underflow
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Floating_N = f9
+FR_Result2 = f9
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+FR_N_float_int = f13
+FR_Norm_N = f14
+
+GR_neg_ov_limit= r14
+GR_big_exp = r14
+GR_N_Biased = r15
+GR_Big = r16
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
+GR_Bias = r20
+GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
+GR_Scratch = r28
+GR_signexp_N = r29
+GR_exp_N = r30
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.section .text
+GLOBAL_IEEE754_ENTRY(scalb)
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
+}
+{ .mfi
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
+ nop.i 0
+}
+//
+// Normalize n
+//
+{ .mfi
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 0
+}
+;;
+
+//
+// Is n NAN, INF, ZERO, +-?
+//
+{ .mfi
+ mov GR_big_exp = 0x1003e // Exponent at which n is integer
+ fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_max_exp = 0x103fe // Exponent of maximum double
+}
+//
+// Normalize x
+//
+{ .mfb
+ nop.m 0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt SCALB_N_UNORM // Branch if n=unorm
+}
+;;
+
+SCALB_COMMON1:
+// Main path continues. Also return here from u=unorm path.
+// Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+ nop.m 0
+ fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
+(p6) br.cond.spnt SCALB_NAN_INF_ZERO
+}
+;;
+
+// Handle special cases if n = Nan, Inf, Zero
+{ .mfi
+ getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
+ fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+ mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{ .mfb
+ mov GR_min_exp = 0x0fc01 // Exponent of minimum double
+ fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt SCALB_NAN_INF_ZERO
+}
+;;
+
+{ .mmi
+ and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub GR_Big = r0, GR_Big // Limit for N
+ nop.i 0
+}
+;;
+
+{ .mib
+ cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+ cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt SCALB_X_UNORM // Branch if x=unorm
+}
+;;
+
+SCALB_COMMON2:
+// Main path continues. Also return here from x=unorm path.
+// Create biased exponent for 2**N
+{ .mmi
+(p6) mov GR_N_as_int = GR_Big // Limit N
+;;
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
+ and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
+}
+;;
+
+//
+// Compute biased result exponent
+// Branch if N is not an integer
+//
+{ .mib
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
+ mov GR_min_den_exp = 0x0fc01 - 52 // Exponent of min denorm dble
+(p9) br.cond.spnt SCALB_N_NOT_INT
+}
+;;
+
+//
+// Raise Denormal operand flag with compare
+// Do final operation
+//
+{ .mfi
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
+}
+{ .mfb
+ nop.m 0
+ fma.d.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt SCALB_UNDERFLOW // Branch if certain underflow
+}
+;;
+
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
+
+{ .bbb
+(p6) br.cond.spnt SCALB_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALB_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALB_POSSIBLE_UNDERFLOW // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0fc01-52 <= exp_Result < 0x0fc01
+SCALB_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x103fe = exp_Result
+SCALB_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ mov GR_pos_ov_limit = 0x103ff // Exponent for positive overflow
+ fsetc.s3 0x7F,0x41
+ nop.i 0
+}
+{ .mfi
+ mov GR_neg_ov_limit = 0x303ff // Exponent for negative overflow
+ fsetc.s2 0x7F,0x42
+ nop.i 0
+}
+;;
+
+//
+// Do final operation with s2 and s3
+//
+{ .mfi
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.d.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+{ .mfi
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.d.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 0
+}
+;;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 0
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 0
+}
+;;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+ nop.m 0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt SCALB_UNDERFLOW
+}
+;;
+
+//
+// Branch out for overflow
+//
+{ .bbb
+(p7) br.cond.spnt SCALB_OVERFLOW
+(p9) br.cond.spnt SCALB_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
+
+// Here if result overflows
+SCALB_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 53, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALB_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 54, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+SCALB_NAN_INF_ZERO:
+
+//
+// Before entry, N has been converted to a fp integer in significand of
+// FR_N_float_int
+//
+// Convert N_float_int to floating point value
+//
+{ .mfi
+ getf.sig GR_N_as_int = FR_N_float_int
+ fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
+}
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+}
+;;
+
+//
+// Either X or N is a Nan, return result and possible raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p6) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p6) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ nop.m 0
+(p7) fma.d.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p7) br.ret.spnt b0
+}
+;;
+
+//
+// If N + Inf do something special
+// For N = -Inf, create Int
+//
+{ .mfb
+ nop.m 0
+(p8) fma.d.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 0
+(p9) fnma.d.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+}
+;;
+
+//
+// If N==-Inf,return x/(-N)
+//
+{ .mfb
+ cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+}
+;;
+
+//
+// Is N an integer.
+//
+{ .mfi
+ nop.m 0
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+}
+;;
+
+//
+// If N not an int, return NaN and raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p0 = f0,f0
+(p7) br.ret.spnt b0
+}
+;;
+
+//
+// Always return x in other path.
+//
+{ .mfb
+ nop.m 0
+ fma.d.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n not int
+// Return NaN and raise invalid.
+SCALB_N_NOT_INT:
+{ .mfb
+ nop.m 0
+ frcpa.s0 FR_Result,p0 = f0,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n=unorm
+SCALB_N_UNORM:
+{ .mfb
+ getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
+ br.cond.sptk SCALB_COMMON1 // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALB_X_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALB_COMMON2 // Return to main path
+}
+;;
+
+GLOBAL_IEEE754_END(scalb)
+LOCAL_LIBM_ENTRY(__libm_error_region)
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfd FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbf.S b/sysdeps/ia64/fpu/e_scalbf.S
new file mode 100644
index 0000000000..ce487e2a9b
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_scalbf.S
@@ -0,0 +1,598 @@
+.file "scalbf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 01/26/01 Scalb completely reworked and now standalone version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
+//
+// API
+//==============================================================
+// float = scalbf (float x, float n)
+// input floating point f8 and floating point f9
+// output floating point f8
+//
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x1007e -> Certain overflow
+// exp_Result = 0x1007e -> Possible overflow
+// 0x0ff81 <= exp_Result < 0x1007e -> No over/underflow (main path)
+// 0x0ff81 - 23 <= exp_Result < 0x0ff81 -> Possible underflow
+// exp_Result < 0x0ff81 - 23 -> Certain underflow
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Floating_N = f9
+FR_Result2 = f9
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+FR_N_float_int = f13
+FR_Norm_N = f14
+
+GR_neg_ov_limit= r14
+GR_big_exp = r14
+GR_N_Biased = r15
+GR_Big = r16
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
+GR_Bias = r20
+GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
+GR_Scratch = r28
+GR_signexp_N = r29
+GR_exp_N = r30
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.section .text
+GLOBAL_IEEE754_ENTRY(scalbf)
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
+}
+{ .mfi
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
+ nop.i 0
+}
+//
+// Normalize n
+//
+{ .mfi
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 0
+}
+;;
+
+//
+// Is n NAN, INF, ZERO, +-?
+//
+{ .mfi
+ mov GR_big_exp = 0x1003e // Exponent at which n is integer
+ fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_max_exp = 0x1007e // Exponent of maximum float
+}
+//
+// Normalize x
+//
+{ .mfb
+ nop.m 0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt SCALBF_N_UNORM // Branch if n=unorm
+}
+;;
+
+SCALBF_COMMON1:
+// Main path continues. Also return here from u=unorm path.
+// Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+ nop.m 0
+ fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
+(p6) br.cond.spnt SCALBF_NAN_INF_ZERO
+}
+;;
+
+// Handle special cases if n = Nan, Inf, Zero
+{ .mfi
+ getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
+ fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+ mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{ .mfb
+ mov GR_min_exp = 0x0ff81 // Exponent of minimum float
+ fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt SCALBF_NAN_INF_ZERO
+}
+;;
+
+{ .mmi
+ and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub GR_Big = r0, GR_Big // Limit for N
+ nop.i 0
+}
+;;
+
+{ .mib
+ cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+ cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt SCALBF_X_UNORM // Branch if x=unorm
+}
+;;
+
+SCALBF_COMMON2:
+// Main path continues. Also return here from x=unorm path.
+// Create biased exponent for 2**N
+{ .mmi
+(p6) mov GR_N_as_int = GR_Big // Limit N
+;;
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
+ and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
+}
+;;
+
+//
+// Compute biased result exponent
+// Branch if N is not an integer
+//
+{ .mib
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
+ mov GR_min_den_exp = 0x0ff81 - 23 // Exponent of min denorm float
+(p9) br.cond.spnt SCALBF_N_NOT_INT
+}
+;;
+
+//
+// Raise Denormal operand flag with compare
+// Do final operation
+//
+{ .mfi
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
+}
+{ .mfb
+ nop.m 0
+ fma.s.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt SCALBF_UNDERFLOW // Branch if certain underflow
+}
+;;
+
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
+
+{ .bbb
+(p6) br.cond.spnt SCALBF_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBF_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBF_POSSIBLE_UNDERFLOW // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0ff81-23 <= exp_Result < 0x0ff81
+SCALBF_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x1007e = exp_Result
+SCALBF_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ mov GR_pos_ov_limit = 0x1007f // Exponent for positive overflow
+ fsetc.s3 0x7F,0x41
+ nop.i 0
+}
+{ .mfi
+ mov GR_neg_ov_limit = 0x3007f // Exponent for negative overflow
+ fsetc.s2 0x7F,0x42
+ nop.i 0
+}
+;;
+
+//
+// Do final operation with s2 and s3
+//
+{ .mfi
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+{ .mfi
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 0
+}
+;;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 0
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 0
+}
+;;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+ nop.m 0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt SCALBF_UNDERFLOW
+}
+;;
+
+//
+// Branch out for overflow
+//
+{ .bbb
+(p7) br.cond.spnt SCALBF_OVERFLOW
+(p9) br.cond.spnt SCALBF_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
+
+// Here if result overflows
+SCALBF_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 55, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBF_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 56, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+SCALBF_NAN_INF_ZERO:
+
+//
+// Before entry, N has been converted to a fp integer in significand of
+// FR_N_float_int
+//
+// Convert N_float_int to floating point value
+//
+{ .mfi
+ getf.sig GR_N_as_int = FR_N_float_int
+ fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
+}
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+}
+;;
+
+//
+// Either X or N is a Nan, return result and possible raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p6) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p6) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ nop.m 0
+(p7) fma.s.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p7) br.ret.spnt b0
+}
+;;
+
+//
+// If N + Inf do something special
+// For N = -Inf, create Int
+//
+{ .mfb
+ nop.m 0
+(p8) fma.s.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 0
+(p9) fnma.s.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+}
+;;
+
+//
+// If N==-Inf,return x/(-N)
+//
+{ .mfb
+ cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+}
+;;
+
+//
+// Is N an integer.
+//
+{ .mfi
+ nop.m 0
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+}
+;;
+
+//
+// If N not an int, return NaN and raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p0 = f0,f0
+(p7) br.ret.spnt b0
+}
+;;
+
+//
+// Always return x in other path.
+//
+{ .mfb
+ nop.m 0
+ fma.s.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n not int
+// Return NaN and raise invalid.
+SCALBF_N_NOT_INT:
+{ .mfb
+ nop.m 0
+ frcpa.s0 FR_Result,p0 = f0,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n=unorm
+SCALBF_N_UNORM:
+{ .mfb
+ getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
+ br.cond.sptk SCALBF_COMMON1 // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALBF_X_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBF_COMMON2 // Return to main path
+}
+;;
+
+GLOBAL_IEEE754_END(scalbf)
+LOCAL_LIBM_ENTRY(__libm_error_region)
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfs FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_scalbl.S b/sysdeps/ia64/fpu/e_scalbl.S
new file mode 100644
index 0000000000..76b24dae07
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_scalbl.S
@@ -0,0 +1,598 @@
+.file "scalbl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 01/26/01 Scalb completely reworked and now standalone version
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+// 08/06/03 Improved performance
+//
+// API
+//==============================================================
+// long double = scalbl (long double x, long double n)
+// input floating point f8 and floating point f9
+// output floating point f8
+//
+// int_type = 0 if int is 32 bits
+// int_type = 1 if int is 64 bits
+//
+// Returns x* 2**n using an fma and detects overflow
+// and underflow.
+//
+//
+// Strategy:
+// Compute biased exponent of result exp_Result = N + exp_X
+// Break into ranges:
+// exp_Result > 0x13ffe -> Certain overflow
+// exp_Result = 0x13ffe -> Possible overflow
+// 0x0c001 <= exp_Result < 0x13ffe -> No over/underflow (main path)
+// 0x0c001 - 63 <= exp_Result < 0x0c001 -> Possible underflow
+// exp_Result < 0x0c001 - 63 -> Certain underflow
+
+FR_Big = f6
+FR_NBig = f7
+FR_Floating_X = f8
+FR_Result = f8
+FR_Floating_N = f9
+FR_Result2 = f9
+FR_Result3 = f10
+FR_Norm_X = f11
+FR_Two_N = f12
+FR_N_float_int = f13
+FR_Norm_N = f14
+
+GR_neg_ov_limit= r14
+GR_big_exp = r14
+GR_N_Biased = r15
+GR_Big = r16
+GR_exp_Result = r18
+GR_pos_ov_limit= r19
+GR_exp_sure_ou = r19
+GR_Bias = r20
+GR_N_as_int = r21
+GR_signexp_X = r22
+GR_exp_X = r23
+GR_exp_mask = r24
+GR_max_exp = r25
+GR_min_exp = r26
+GR_min_den_exp = r27
+GR_Scratch = r28
+GR_signexp_N = r29
+GR_exp_N = r30
+
+GR_SAVE_B0 = r32
+GR_SAVE_GP = r33
+GR_SAVE_PFS = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Tag = r38
+
+.section .text
+GLOBAL_IEEE754_ENTRY(scalbl)
+
+//
+// Is x NAN, INF, ZERO, +-?
+// Build the exponent Bias
+//
+{ .mfi
+ getf.exp GR_signexp_N = FR_Floating_N // Get signexp of n
+ fclass.m p6,p0 = FR_Floating_X, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_Bias = 0x0ffff
+}
+{ .mfi
+ mov GR_Big = 35000 // If N this big then certain overflow
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Floating_N // Get N in significand
+ nop.i 0
+}
+;;
+
+{ .mfi
+ getf.exp GR_signexp_X = FR_Floating_X // Get signexp of x
+ fclass.m p7,p0 = FR_Floating_N, 0x0b // Test for n=unorm
+ nop.i 0
+}
+//
+// Normalize n
+//
+{ .mfi
+ mov GR_exp_mask = 0x1ffff // Exponent mask
+ fnorm.s1 FR_Norm_N = FR_Floating_N
+ nop.i 0
+}
+;;
+
+//
+// Is n NAN, INF, ZERO, +-?
+//
+{ .mfi
+ mov GR_big_exp = 0x1003e // Exponent at which n is integer
+ fclass.m p9,p0 = FR_Floating_N, 0xe7 // @snan | @qnan | @inf | @zero
+ mov GR_max_exp = 0x13ffe // Exponent of maximum long double
+}
+//
+// Normalize x
+//
+{ .mfb
+ nop.m 0
+ fnorm.s1 FR_Norm_X = FR_Floating_X
+(p7) br.cond.spnt SCALBL_N_UNORM // Branch if n=unorm
+}
+;;
+
+SCALBL_COMMON1:
+// Main path continues. Also return here from u=unorm path.
+// Handle special cases if x = Nan, Inf, Zero
+{ .mfb
+ nop.m 0
+ fcmp.lt.s1 p7,p0 = FR_Floating_N, f0 // Test N negative
+(p6) br.cond.spnt SCALBL_NAN_INF_ZERO
+}
+;;
+
+// Handle special cases if n = Nan, Inf, Zero
+{ .mfi
+ getf.sig GR_N_as_int = FR_N_float_int // Get n from significand
+ fclass.m p8,p0 = FR_Floating_X, 0x0b // Test for x=unorm
+ mov GR_exp_sure_ou = 0x1000e // Exp_N where x*2^N sure over/under
+}
+{ .mfb
+ mov GR_min_exp = 0x0c001 // Exponent of minimum long double
+ fcvt.xf FR_N_float_int = FR_N_float_int // Convert N to FP integer
+(p9) br.cond.spnt SCALBL_NAN_INF_ZERO
+}
+;;
+
+{ .mmi
+ and GR_exp_N = GR_exp_mask, GR_signexp_N // Get exponent of N
+(p7) sub GR_Big = r0, GR_Big // Limit for N
+ nop.i 0
+}
+;;
+
+{ .mib
+ cmp.lt p9,p0 = GR_exp_N, GR_big_exp // N possible non-integer?
+ cmp.ge p6,p0 = GR_exp_N, GR_exp_sure_ou // N certain over/under?
+(p8) br.cond.spnt SCALBL_X_UNORM // Branch if x=unorm
+}
+;;
+
+SCALBL_COMMON2:
+// Main path continues. Also return here from x=unorm path.
+// Create biased exponent for 2**N
+{ .mmi
+(p6) mov GR_N_as_int = GR_Big // Limit N
+;;
+ add GR_N_Biased = GR_Bias,GR_N_as_int
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp FR_Two_N = GR_N_Biased // Form 2**N
+(p9) fcmp.neq.unc.s1 p9,p0 = FR_Norm_N, FR_N_float_int // Test if N an integer
+ and GR_exp_X = GR_exp_mask, GR_signexp_X // Get exponent of X
+}
+;;
+
+//
+// Compute biased result exponent
+// Branch if N is not an integer
+//
+{ .mib
+ add GR_exp_Result = GR_exp_X, GR_N_as_int
+ mov GR_min_den_exp = 0x0c001 - 63 // Exp of min denorm long dble
+(p9) br.cond.spnt SCALBL_N_NOT_INT
+}
+;;
+
+//
+// Raise Denormal operand flag with compare
+// Do final operation
+//
+{ .mfi
+ cmp.lt p7,p6 = GR_exp_Result, GR_max_exp // Test no overflow
+ fcmp.ge.s0 p0,p11 = FR_Floating_X,FR_Floating_N // Dummy to set denorm
+ cmp.lt p9,p0 = GR_exp_Result, GR_min_den_exp // Test sure underflow
+}
+{ .mfb
+ nop.m 0
+ fma.s0 FR_Result = FR_Two_N,FR_Norm_X,f0
+(p9) br.cond.spnt SCALBL_UNDERFLOW // Branch if certain underflow
+}
+;;
+
+{ .mib
+(p6) cmp.gt.unc p6,p8 = GR_exp_Result, GR_max_exp // Test sure overflow
+(p7) cmp.ge.unc p7,p9 = GR_exp_Result, GR_min_exp // Test no over/underflow
+(p7) br.ret.sptk b0 // Return from main path
+}
+;;
+
+{ .bbb
+(p6) br.cond.spnt SCALBL_OVERFLOW // Branch if certain overflow
+(p8) br.cond.spnt SCALBL_POSSIBLE_OVERFLOW // Branch if possible overflow
+(p9) br.cond.spnt SCALBL_POSSIBLE_UNDERFLOW // Branch if possible underflow
+}
+;;
+
+// Here if possible underflow.
+// Resulting exponent: 0x0c001-63 <= exp_Result < 0x0c001
+SCALBL_POSSIBLE_UNDERFLOW:
+//
+// Here if possible overflow.
+// Resulting exponent: 0x13ffe = exp_Result
+SCALBL_POSSIBLE_OVERFLOW:
+
+// Set up necessary status fields
+//
+// S0 user supplied status
+// S2 user supplied status + WRE + TD (Overflows)
+// S3 user supplied status + FZ + TD (Underflows)
+//
+{ .mfi
+ mov GR_pos_ov_limit = 0x13fff // Exponent for positive overflow
+ fsetc.s3 0x7F,0x41
+ nop.i 0
+}
+{ .mfi
+ mov GR_neg_ov_limit = 0x33fff // Exponent for negative overflow
+ fsetc.s2 0x7F,0x42
+ nop.i 0
+}
+;;
+
+//
+// Do final operation with s2 and s3
+//
+{ .mfi
+ setf.exp FR_NBig = GR_neg_ov_limit
+ fma.s3 FR_Result3 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+{ .mfi
+ setf.exp FR_Big = GR_pos_ov_limit
+ fma.s2 FR_Result2 = FR_Two_N,FR_Norm_X,f0
+ nop.i 0
+}
+;;
+
+// Check for overflow or underflow.
+// Restore s3
+// Restore s2
+//
+{ .mfi
+ nop.m 0
+ fsetc.s3 0x7F,0x40
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40
+ nop.i 0
+}
+;;
+
+//
+// Is the result zero?
+//
+{ .mfi
+ nop.m 0
+ fclass.m p6, p0 = FR_Result3, 0x007
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p7, p8 = FR_Result2 , FR_Big
+ nop.i 0
+}
+;;
+
+//
+// Detect masked underflow - Tiny + Inexact Only
+//
+{ .mfi
+ nop.m 0
+(p6) fcmp.neq.unc.s1 p6, p0 = FR_Result , FR_Result2
+ nop.i 0
+}
+;;
+
+//
+// Is result bigger the allowed range?
+// Branch out for underflow
+//
+{ .mfb
+ nop.m 0
+(p8) fcmp.le.unc.s1 p9, p10 = FR_Result2 , FR_NBig
+(p6) br.cond.spnt SCALBL_UNDERFLOW
+}
+;;
+
+//
+// Branch out for overflow
+//
+{ .bbb
+(p7) br.cond.spnt SCALBL_OVERFLOW
+(p9) br.cond.spnt SCALBL_OVERFLOW
+ br.ret.sptk b0 // Return from main path.
+}
+;;
+
+// Here if result overflows
+SCALBL_OVERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 51, r0 // Set error tag for overflow
+ br.cond.sptk __libm_error_region // Call error support for overflow
+}
+;;
+
+// Here if result underflows
+SCALBL_UNDERFLOW:
+{ .mib
+ alloc r32=ar.pfs,3,0,4,0
+ addl GR_Tag = 52, r0 // Set error tag for underflow
+ br.cond.sptk __libm_error_region // Call error support for underflow
+}
+;;
+
+SCALBL_NAN_INF_ZERO:
+
+//
+// Before entry, N has been converted to a fp integer in significand of
+// FR_N_float_int
+//
+// Convert N_float_int to floating point value
+//
+{ .mfi
+ getf.sig GR_N_as_int = FR_N_float_int
+ fclass.m p6,p0 = FR_Floating_N, 0xc3 //@snan | @qnan
+ nop.i 0
+}
+{ .mfi
+ addl GR_Scratch = 1,r0
+ fcvt.xf FR_N_float_int = FR_N_float_int
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p0 = FR_Floating_X, 0xc3 //@snan | @qnan
+ shl GR_Scratch = GR_Scratch,63
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p8,p0 = FR_Floating_N, 0x21 // @inf
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fclass.m p9,p0 = FR_Floating_N, 0x22 // @-inf
+ nop.i 0
+}
+;;
+
+//
+// Either X or N is a Nan, return result and possible raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p6) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p6) br.ret.spnt b0
+}
+;;
+
+{ .mfb
+ nop.m 0
+(p7) fma.s0 FR_Result = FR_Floating_N,FR_Floating_X,f0
+(p7) br.ret.spnt b0
+}
+;;
+
+//
+// If N + Inf do something special
+// For N = -Inf, create Int
+//
+{ .mfb
+ nop.m 0
+(p8) fma.s0 FR_Result = FR_Floating_X, FR_Floating_N,f0
+(p8) br.ret.spnt b0
+}
+{ .mfi
+ nop.m 0
+(p9) fnma.s0 FR_Floating_N = FR_Floating_N, f1, f0
+ nop.i 0
+}
+;;
+
+//
+// If N==-Inf,return x/(-N)
+//
+{ .mfb
+ cmp.ne p7,p0 = GR_N_as_int,GR_Scratch
+(p9) frcpa.s0 FR_Result,p0 = FR_Floating_X,FR_Floating_N
+(p9) br.ret.spnt b0
+}
+;;
+
+//
+// Is N an integer.
+//
+{ .mfi
+ nop.m 0
+(p7) fcmp.neq.unc.s1 p7,p0 = FR_Norm_N, FR_N_float_int
+ nop.i 0
+}
+;;
+
+//
+// If N not an int, return NaN and raise invalid.
+//
+{ .mfb
+ nop.m 0
+(p7) frcpa.s0 FR_Result,p0 = f0,f0
+(p7) br.ret.spnt b0
+}
+;;
+
+//
+// Always return x in other path.
+//
+{ .mfb
+ nop.m 0
+ fma.s0 FR_Result = FR_Floating_X,f1,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n not int
+// Return NaN and raise invalid.
+SCALBL_N_NOT_INT:
+{ .mfb
+ nop.m 0
+ frcpa.s0 FR_Result,p0 = f0,f0
+ br.ret.sptk b0
+}
+;;
+
+// Here if n=unorm
+SCALBL_N_UNORM:
+{ .mfb
+ getf.exp GR_signexp_N = FR_Norm_N // Get signexp of normalized n
+ fcvt.fx.trunc.s1 FR_N_float_int = FR_Norm_N // Get N in significand
+ br.cond.sptk SCALBL_COMMON1 // Return to main path
+}
+;;
+
+// Here if x=unorm
+SCALBL_X_UNORM:
+{ .mib
+ getf.exp GR_signexp_X = FR_Norm_X // Get signexp of normalized x
+ nop.i 0
+ br.cond.sptk SCALBL_COMMON2 // Return to main path
+}
+;;
+
+GLOBAL_IEEE754_END(scalbl)
+LOCAL_LIBM_ENTRY(__libm_error_region)
+
+//
+// Get stack address of N
+//
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs
+}
+//
+// Adjust sp
+//
+{ .mfi
+.fframe 64
+ add sp=-64,sp
+ nop.f 0
+ mov GR_SAVE_GP=gp
+};;
+
+//
+// Store N on stack in correct position
+// Locate the address of x on stack
+//
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Norm_N,16
+ add GR_Parameter_X = 16,sp
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0
+};;
+
+//
+// Store x on the stack.
+// Get address for result on stack.
+//
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_Norm_X
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_Result
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support#
+};;
+
+//
+// Get location of result on stack
+//
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+//
+// Get the new result
+//
+{ .mmi
+ ldfe FR_Result = [GR_Parameter_RESULT]
+.restore sp
+ add sp = 64,sp
+ mov b0 = GR_SAVE_B0
+};;
+
+//
+// Restore gp, ar.pfs and return
+//
+{ .mib
+ mov gp = GR_SAVE_GP
+ mov ar.pfs = GR_SAVE_PFS
+ br.ret.sptk b0
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinh.S b/sysdeps/ia64/fpu/e_sinh.S
new file mode 100644
index 0000000000..344b6b671c
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sinh.S
@@ -0,0 +1,905 @@
+.file "sinh.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 10/12/00 Update to set denormal operand and underflow flags
+// 01/22/01 Fixed to set inexact flag for small args.
+// 05/02/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/20/02 Improved speed with new algorithm
+// 03/31/05 Reformatted delimiters between data tables
+
+// API
+//==============================================================
+// double sinh(double)
+
+// Overview of operation
+//==============================================================
+// Case 1: 0 < |x| < 2^-60
+// Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
+//
+// Case 2: 2^-60 < |x| < 0.25
+// Evaluate sinh(x) by a 13th order polynomial
+// Care is take for the order of multiplication; and A1 is not exactly 1/3!,
+// A2 is not exactly 1/5!, etc.
+// sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9 + A5*x^11 + A6*x^13)
+//
+// Case 3: 0.25 < |x| < 710.47586
+// Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
+//
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 128/log2
+// n = int(w)
+// x = n log2/128 + r + delta
+
+// n = 128M + index_1 + 2^4 index_2
+// x = M log2 + (log2/128) index_1 + (log2/8) index_2 + r + delta
+
+// exp(x) = 2^M 2^(index_1/128) 2^(index_2/8) exp(r) exp(delta)
+// Construct 2^M
+// Get 2^(index_1/128) from table_1;
+// Get 2^(index_2/8) from table_2;
+// Calculate exp(r) by 5th order polynomial
+// r = x - n (log2/128)_high
+// delta = - n (log2/128)_low
+// Calculate exp(delta) as 1 + delta
+
+
+// Special values
+//==============================================================
+// sinh(+0) = +0
+// sinh(-0) = -0
+
+// sinh(+qnan) = +qnan
+// sinh(-qnan) = -qnan
+// sinh(+snan) = +qnan
+// sinh(-snan) = -qnan
+
+// sinh(-inf) = -inf
+// sinh(+inf) = +inf
+
+// Overflow and Underflow
+//=======================
+// sinh(x) = largest double normal when
+// |x| = 710.47586 = 0x408633ce8fb9f87d
+//
+// Underflow is handled as described in case 1 above
+
+// Registers used
+//==============================================================
+// Floating Point registers used:
+// f8, input, output
+// f6 -> f15, f32 -> f61
+
+// General registers used:
+// r14 -> r40
+
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//==============================================================
+
+rRshf = r14
+rN_neg = r14
+rAD_TB1 = r15
+rAD_TB2 = r16
+rAD_P = r17
+rN = r18
+rIndex_1 = r19
+rIndex_2_16 = r20
+rM = r21
+rBiased_M = r21
+rSig_inv_ln2 = r22
+rIndex_1_neg = r22
+rExp_bias = r23
+rExp_bias_minus_1 = r23
+rExp_mask = r24
+rTmp = r24
+rGt_ln = r24
+rIndex_2_16_neg = r24
+rM_neg = r25
+rBiased_M_neg = r25
+rRshf_2to56 = r26
+rAD_T1_neg = r26
+rExp_2tom56 = r28
+rAD_T2_neg = r28
+rAD_T1 = r29
+rAD_T2 = r30
+rSignexp_x = r31
+rExp_x = r31
+
+GR_SAVE_B0 = r33
+GR_SAVE_PFS = r34
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+
+fRSHF_2TO56 = f6
+fINV_LN2_2TO63 = f7
+fW_2TO56_RSH = f9
+f2TOM56 = f11
+fP5 = f12
+fP4 = f13
+fP3 = f14
+fP2 = f15
+
+fLn2_by_128_hi = f33
+fLn2_by_128_lo = f34
+
+fRSHF = f35
+fNfloat = f36
+fNormX = f37
+fR = f38
+fF = f39
+
+fRsq = f40
+f2M = f41
+fS1 = f42
+fT1 = f42
+fS2 = f43
+fT2 = f43
+fS = f43
+fWre_urm_f8 = f44
+fAbsX = f44
+
+fMIN_DBL_OFLOW_ARG = f45
+fMAX_DBL_NORM_ARG = f46
+fXsq = f47
+fX4 = f48
+fGt_pln = f49
+fTmp = f49
+
+fP54 = f50
+fP5432 = f50
+fP32 = f51
+fP = f52
+fP54_neg = f53
+fP5432_neg = f53
+fP32_neg = f54
+fP_neg = f55
+fF_neg = f56
+
+f2M_neg = f57
+fS1_neg = f58
+fT1_neg = f58
+fS2_neg = f59
+fT2_neg = f59
+fS_neg = f59
+fExp = f60
+fExp_neg = f61
+
+fA6 = f50
+fA65 = f50
+fA6543 = f50
+fA654321 = f50
+fA5 = f51
+fA4 = f52
+fA43 = f52
+fA3 = f53
+fA2 = f54
+fA21 = f54
+fA1 = f55
+fX3 = f56
+
+// Data tables
+//==============================================================
+
+RODATA
+.align 16
+
+// ************* DO NOT CHANGE ORDER OF THESE TABLES ********************
+
+// double-extended 1/ln(2)
+// 3fff b8aa 3b29 5c17 f0bb be87fed0691d3e88
+// 3fff b8aa 3b29 5c17 f0bc
+// For speed the significand will be loaded directly with a movl and setf.sig
+// and the exponent will be bias+63 instead of bias+0. Thus subsequent
+// computations need to scale appropriately.
+// The constant 128/ln(2) is needed for the computation of w. This is also
+// obtained by scaling the computations.
+//
+// Two shifting constants are loaded directly with movl and setf.d.
+// 1. fRSHF_2TO56 = 1.1000..00 * 2^(63-7)
+// This constant is added to x*1/ln2 to shift the integer part of
+// x*128/ln2 into the rightmost bits of the significand.
+// The result of this fma is fW_2TO56_RSH.
+// 2. fRSHF = 1.1000..00 * 2^(63)
+// This constant is subtracted from fW_2TO56_RSH * 2^(-56) to give
+// the integer part of w, n, as a floating-point number.
+// The result of this fms is fNfloat.
+
+
+LOCAL_OBJECT_START(exp_table_1)
+data8 0x408633ce8fb9f87e // smallest dbl overflow arg
+data8 0x408633ce8fb9f87d // largest dbl arg to give normal dbl result
+data8 0xb17217f7d1cf79ab , 0x00003ff7 // ln2/128 hi
+data8 0xc9e3b39803f2f6af , 0x00003fb7 // ln2/128 lo
+//
+// Table 1 is 2^(index_1/128) where
+// index_1 goes from 0 to 15
+//
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x80B1ED4FD999AB6C , 0x00003FFF
+data8 0x8164D1F3BC030773 , 0x00003FFF
+data8 0x8218AF4373FC25EC , 0x00003FFF
+data8 0x82CD8698AC2BA1D7 , 0x00003FFF
+data8 0x8383594EEFB6EE37 , 0x00003FFF
+data8 0x843A28C3ACDE4046 , 0x00003FFF
+data8 0x84F1F656379C1A29 , 0x00003FFF
+data8 0x85AAC367CC487B15 , 0x00003FFF
+data8 0x8664915B923FBA04 , 0x00003FFF
+data8 0x871F61969E8D1010 , 0x00003FFF
+data8 0x87DB357FF698D792 , 0x00003FFF
+data8 0x88980E8092DA8527 , 0x00003FFF
+data8 0x8955EE03618E5FDD , 0x00003FFF
+data8 0x8A14D575496EFD9A , 0x00003FFF
+data8 0x8AD4C6452C728924 , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_1)
+
+// Table 2 is 2^(index_1/8) where
+// index_2 goes from 0 to 7
+LOCAL_OBJECT_START(exp_table_2)
+data8 0x8000000000000000 , 0x00003FFF
+data8 0x8B95C1E3EA8BD6E7 , 0x00003FFF
+data8 0x9837F0518DB8A96F , 0x00003FFF
+data8 0xA5FED6A9B15138EA , 0x00003FFF
+data8 0xB504F333F9DE6484 , 0x00003FFF
+data8 0xC5672A115506DADD , 0x00003FFF
+data8 0xD744FCCAD69D6AF4 , 0x00003FFF
+data8 0xEAC0C6E7DD24392F , 0x00003FFF
+LOCAL_OBJECT_END(exp_table_2)
+
+
+LOCAL_OBJECT_START(exp_p_table)
+data8 0x3f8111116da21757 //P5
+data8 0x3fa55555d787761c //P4
+data8 0x3fc5555555555414 //P3
+data8 0x3fdffffffffffd6a //P2
+LOCAL_OBJECT_END(exp_p_table)
+
+LOCAL_OBJECT_START(sinh_p_table)
+data8 0xB08AF9AE78C1239F, 0x00003FDE // A6
+data8 0xB8EF1D28926D8891, 0x00003FEC // A4
+data8 0x8888888888888412, 0x00003FF8 // A2
+data8 0xD732377688025BE9, 0x00003FE5 // A5
+data8 0xD00D00D00D4D39F2, 0x00003FF2 // A3
+data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // A1
+LOCAL_OBJECT_END(sinh_p_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(sinh)
+
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl rSig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ addl rAD_TB1 = @ltoff(exp_table_1), gp
+ movl rRshf_2to56 = 0x4768000000000000 // 1.10000 2^(63+56)
+}
+;;
+
+{ .mfi
+ ld8 rAD_TB1 = [rAD_TB1]
+ fclass.m p6,p0 = f8,0x0b // Test for x=unorm
+ mov rExp_mask = 0x1ffff
+}
+{ .mfi
+ mov rExp_bias = 0xffff
+ fnorm.s1 fNormX = f8
+ mov rExp_2tom56 = 0xffff-56
+}
+;;
+
+// Form two constants we need
+// 1/ln2 * 2^63 to compute w = x * 1/ln2 * 128
+// 1.1000..000 * 2^(63+63-7) to right shift int(w) into the significand
+
+{ .mfi
+ setf.sig fINV_LN2_2TO63 = rSig_inv_ln2 // form 1/ln2 * 2^63
+ fclass.m p8,p0 = f8,0x07 // Test for x=0
+ nop.i 999
+}
+{ .mlx
+ setf.d fRSHF_2TO56 = rRshf_2to56 // Form const 1.100 * 2^(63+56)
+ movl rRshf = 0x43e8000000000000 // 1.10000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ ldfpd fMIN_DBL_OFLOW_ARG, fMAX_DBL_NORM_ARG = [rAD_TB1],16
+ fclass.m p10,p0 = f8,0x1e3 // Test for x=inf, nan, NaT
+ nop.i 0
+}
+{ .mfb
+ setf.exp f2TOM56 = rExp_2tom56 // form 2^-56 for scaling Nfloat
+ nop.f 0
+(p6) br.cond.spnt SINH_UNORM // Branch if x=unorm
+}
+;;
+
+SINH_COMMON:
+{ .mfi
+ ldfe fLn2_by_128_hi = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ setf.d fRSHF = rRshf // Form right shift const 1.100 * 2^63
+ nop.f 0
+(p8) br.ret.spnt b0 // Exit for x=0, result=x
+}
+;;
+
+{ .mfi
+ ldfe fLn2_by_128_lo = [rAD_TB1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+(p10) fma.d.s0 f8 = f8,f1,f0 // Result if x=inf, nan, NaT
+(p10) br.ret.spnt b0 // quick exit for x=inf, nan, NaT
+}
+;;
+
+// After that last load rAD_TB1 points to the beginning of table 1
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p6,p0 = f8, f0 // Dummy to set D
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
+}
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt SINH_SMALL // Branch if 0 < |x| < 2^-2
+}
+;;
+
+// W = X * Inv_log2_by_128
+// By adding 1.10...0*2^63 we shift and get round_int(W) in significand.
+// We actually add 1.10...0*2^56 to X * Inv_log2 to do the same thing.
+
+{ .mfi
+ add rAD_P = 0x180, rAD_TB1
+ fma.s1 fW_2TO56_RSH = fNormX, fINV_LN2_2TO63, fRSHF_2TO56
+ add rAD_TB2 = 0x100, rAD_TB1
+}
+;;
+
+// Divide arguments into the following categories:
+// Certain Safe - 0.25 <= |x| <= MAX_DBL_NORM_ARG
+// Possible Overflow p14 - MAX_DBL_NORM_ARG < |x| < MIN_DBL_OFLOW_ARG
+// Certain Overflow p15 - MIN_DBL_OFLOW_ARG <= |x| < +inf
+//
+// If the input is really a double arg, then there will never be
+// "Possible Overflow" arguments.
+//
+
+{ .mfi
+ ldfpd fP5, fP4 = [rAD_P] ,16
+ fcmp.ge.s1 p15,p14 = fAbsX,fMIN_DBL_OFLOW_ARG
+ nop.i 0
+}
+;;
+
+// Nfloat = round_int(W)
+// The signficand of fW_2TO56_RSH contains the rounded integer part of W,
+// as a twos complement number in the lower bits (that is, it may be negative).
+// That twos complement number (called N) is put into rN.
+
+// Since fW_2TO56_RSH is scaled by 2^56, it must be multiplied by 2^-56
+// before the shift constant 1.10000 * 2^63 is subtracted to yield fNfloat.
+// Thus, fNfloat contains the floating point version of N
+
+{ .mfi
+ ldfpd fP3, fP2 = [rAD_P]
+(p14) fcmp.gt.unc.s1 p14,p0 = fAbsX,fMAX_DBL_NORM_ARG
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fms.s1 fNfloat = fW_2TO56_RSH, f2TOM56, fRSHF
+(p15) br.cond.spnt SINH_CERTAIN_OVERFLOW
+}
+;;
+
+{ .mfi
+ getf.sig rN = fW_2TO56_RSH
+ nop.f 0
+ mov rExp_bias_minus_1 = 0xfffe
+}
+;;
+
+// rIndex_1 has index_1
+// rIndex_2_16 has index_2 * 16
+// rBiased_M has M
+
+// rM has true M
+// r = x - Nfloat * ln2_by_128_hi
+// f = 1 - Nfloat * ln2_by_128_lo
+{ .mfi
+ and rIndex_1 = 0x0f, rN
+ fnma.s1 fR = fNfloat, fLn2_by_128_hi, fNormX
+ shr rM = rN, 0x7
+}
+{ .mfi
+ and rIndex_2_16 = 0x70, rN
+ fnma.s1 fF = fNfloat, fLn2_by_128_lo, f1
+ sub rN_neg = r0, rN
+}
+;;
+
+{ .mmi
+ and rIndex_1_neg = 0x0f, rN_neg
+ add rBiased_M = rExp_bias_minus_1, rM
+ shr rM_neg = rN_neg, 0x7
+}
+{ .mmi
+ and rIndex_2_16_neg = 0x70, rN_neg
+ add rAD_T2 = rAD_TB2, rIndex_2_16
+ shladd rAD_T1 = rIndex_1, 4, rAD_TB1
+}
+;;
+
+// rAD_T1 has address of T1
+// rAD_T2 has address if T2
+
+{ .mmi
+ setf.exp f2M = rBiased_M
+ ldfe fT2 = [rAD_T2]
+ nop.i 0
+}
+{ .mmi
+ add rBiased_M_neg = rExp_bias_minus_1, rM_neg
+ add rAD_T2_neg = rAD_TB2, rIndex_2_16_neg
+ shladd rAD_T1_neg = rIndex_1_neg, 4, rAD_TB1
+}
+;;
+
+// Create Scale = 2^M
+// Load T1 and T2
+{ .mmi
+ ldfe fT1 = [rAD_T1]
+ nop.m 0
+ nop.i 0
+}
+{ .mmf
+ setf.exp f2M_neg = rBiased_M_neg
+ ldfe fT2_neg = [rAD_T2_neg]
+ fma.s1 fF_neg = fNfloat, fLn2_by_128_lo, f1
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fRsq = fR, fR, f0
+ nop.i 0
+}
+{ .mfi
+ ldfe fT1_neg = [rAD_T1_neg]
+ fma.s1 fP54 = fR, fP5, fP4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP32 = fR, fP3, fP2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fP54_neg = fR, fP5, fP4
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fnma.s1 fP32_neg = fR, fP3, fP2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP5432 = fRsq, fP54, fP32
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS2 = fF,fT2,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fS1 = f2M,fT1,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fP5432_neg = fRsq, fP54_neg, fP32_neg
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fS1_neg = f2M_neg,fT1_neg,f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS2_neg = fF_neg,fT2_neg,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP = fRsq, fP5432, fR
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS = fS1,fS2,f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fms.s1 fP_neg = fRsq, fP5432_neg, fR
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fS_neg = fS1_neg,fS2_neg,f0
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fmpy.s0 fTmp = fLn2_by_128_lo, fLn2_by_128_lo // Force inexact
+(p14) br.cond.spnt SINH_POSSIBLE_OVERFLOW
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fExp = fS, fP, fS
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fExp_neg = fS_neg, fP_neg, fS_neg
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fms.d.s0 f8 = fExp, f1, fExp_neg
+ br.ret.sptk b0 // Normal path exit
+}
+;;
+
+// Here if 0 < |x| < 0.25
+SINH_SMALL:
+{ .mfi
+ add rAD_T1 = 0x1a0, rAD_TB1
+ fcmp.lt.s1 p7, p8 = fNormX, f0 // Test sign of x
+ cmp.gt p6, p0 = -60, rExp_x // Test |x| < 2^(-60)
+}
+{ .mfi
+ add rAD_T2 = 0x1d0, rAD_TB1
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mmb
+ ldfe fA6 = [rAD_T1],16
+ ldfe fA5 = [rAD_T2],16
+(p6) br.cond.spnt SINH_VERY_SMALL // Branch if |x| < 2^(-60)
+}
+;;
+
+{ .mmi
+ ldfe fA4 = [rAD_T1],16
+ ldfe fA3 = [rAD_T2],16
+ nop.i 0
+}
+;;
+
+{ .mmi
+ ldfe fA2 = [rAD_T1]
+ ldfe fA1 = [rAD_T2]
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fX3 = fNormX, fXsq, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA65 = fXsq, fA6, fA5
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA6543 = fX4, fA65, fA43
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA654321 = fX4, fA6543, fA21
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 fTmp = fA6, fA6
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fA654321, fX3, fNormX
+ br.ret.sptk b0 // Exit if 2^-60 < |x| < 0.25
+}
+;;
+
+SINH_VERY_SMALL:
+// Here if 0 < |x| < 2^-60
+// Compute result by x + sgn(x)*x^2 to get properly rounded result
+.pred.rel "mutex",p7,p8
+{ .mfi
+ nop.m 0
+(p7) fnma.d.s0 f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p8) fma.d.s0 f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
+ br.ret.sptk b0 // Exit if |x| < 2^-60
+}
+;;
+
+
+SINH_POSSIBLE_OVERFLOW:
+
+// Here if fMAX_DBL_NORM_ARG < |x| < fMIN_DBL_OFLOW_ARG
+// This cannot happen if input is a double, only if input higher precision.
+// Overflow is a possibility, not a certainty.
+
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest double, then we have
+// overflow
+
+{ .mfi
+ mov rGt_ln = 0x103ff // Exponent for largest dbl + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp fGt_pln = rGt_ln // Create largest double + 1 ulp
+ fma.d.s2 fWre_urm_f8 = fS, fP, fS // Result with wre set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt SINH_CERTAIN_OVERFLOW // Branch if overflow
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.d.s0 f8 = fS, fP, fS
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+SINH_CERTAIN_OVERFLOW:
+{ .mfi
+ sub rTmp = rExp_mask, r0, 1
+ fcmp.lt.s1 p6, p7 = fNormX, f0 // Test for x < 0
+ nop.i 0
+}
+;;
+
+{ .mmf
+ alloc r32=ar.pfs,1,4,4,0
+ setf.exp fTmp = rTmp
+ fmerge.s FR_X = f8,f8
+}
+;;
+
+{ .mfi
+ mov GR_Parameter_TAG = 127
+(p6) fnma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p7) fma.d.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
+}
+;;
+
+// Here if x unorm
+SINH_UNORM:
+{ .mfb
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk SINH_COMMON
+}
+;;
+
+GLOBAL_IEEE754_END(sinh)
+libm_alias_double_other (__sinh, sinh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfd [GR_Parameter_Y] = FR_Y,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfd [GR_Parameter_X] = FR_X // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = FR_RESULT // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhf.S b/sysdeps/ia64/fpu/e_sinhf.S
new file mode 100644
index 0000000000..6a14928e04
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sinhf.S
@@ -0,0 +1,748 @@
+.file "sinhf.s"
+
+
+// Copyright (c) 2000 - 2005, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+
+// History
+//*********************************************************************
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 10/12/00 Update to set denormal operand and underflow flags
+// 01/22/01 Fixed to set inexact flag for small args.
+// 05/02/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 11/20/02 Improved algorithm based on expf
+// 03/31/05 Reformatted delimiters between data tables
+//
+// API
+//*********************************************************************
+// float sinhf(float)
+//
+// Overview of operation
+//*********************************************************************
+// Case 1: 0 < |x| < 2^-60
+// Result = x, computed by x+sgn(x)*x^2) to handle flags and rounding
+//
+// Case 2: 2^-60 < |x| < 0.25
+// Evaluate sinh(x) by a 9th order polynomial
+// Care is take for the order of multiplication; and A2 is not exactly 1/5!,
+// A3 is not exactly 1/7!, etc.
+// sinh(x) = x + (A1*x^3 + A2*x^5 + A3*x^7 + A4*x^9)
+//
+// Case 3: 0.25 < |x| < 89.41598
+// Algorithm is based on the identity sinh(x) = ( exp(x) - exp(-x) ) / 2.
+// The algorithm for exp is described as below. There are a number of
+// economies from evaluating both exp(x) and exp(-x). Although we
+// are evaluating both quantities, only where the quantities diverge do we
+// duplicate the computations. The basic algorithm for exp(x) is described
+// below.
+//
+// Take the input x. w is "how many log2/128 in x?"
+// w = x * 64/log2
+// NJ = int(w)
+// x = NJ*log2/64 + R
+
+// NJ = 64*n + j
+// x = n*log2 + (log2/64)*j + R
+//
+// So, exp(x) = 2^n * 2^(j/64)* exp(R)
+//
+// T = 2^n * 2^(j/64)
+// Construct 2^n
+// Get 2^(j/64) table
+// actually all the entries of 2^(j/64) table are stored in DP and
+// with exponent bits set to 0 -> multiplication on 2^n can be
+// performed by doing logical "or" operation with bits presenting 2^n
+
+// exp(R) = 1 + (exp(R) - 1)
+// P = exp(R) - 1 approximated by Taylor series of 3rd degree
+// P = A3*R^3 + A2*R^2 + R, A3 = 1/6, A2 = 1/2
+//
+
+// The final result is reconstructed as follows
+// exp(x) = T + T*P
+
+// Special values
+//*********************************************************************
+// sinhf(+0) = +0
+// sinhf(-0) = -0
+
+// sinhf(+qnan) = +qnan
+// sinhf(-qnan) = -qnan
+// sinhf(+snan) = +qnan
+// sinhf(-snan) = -qnan
+
+// sinhf(-inf) = -inf
+// sinhf(+inf) = +inf
+
+// Overflow and Underflow
+//*********************************************************************
+// sinhf(x) = largest single normal when
+// x = 89.41598 = 0x42b2d4fc
+//
+// Underflow is handled as described in case 1 above
+
+// Registers used
+//*********************************************************************
+// Floating Point registers used:
+// f8 input, output
+// f6,f7, f9 -> f15, f32 -> f45
+
+// General registers used:
+// r2, r3, r16 -> r38
+
+// Predicate registers used:
+// p6 -> p15
+
+// Assembly macros
+//*********************************************************************
+// integer registers used
+// scratch
+rNJ = r2
+rNJ_neg = r3
+
+rJ_neg = r16
+rN_neg = r17
+rSignexp_x = r18
+rExp_x = r18
+rExp_mask = r19
+rExp_bias = r20
+rAd1 = r21
+rAd2 = r22
+rJ = r23
+rN = r24
+rTblAddr = r25
+rA3 = r26
+rExpHalf = r27
+rLn2Div64 = r28
+rGt_ln = r29
+r17ones_m1 = r29
+rRightShifter = r30
+rJ_mask = r30
+r64DivLn2 = r31
+rN_mask = r31
+// stacked
+GR_SAVE_PFS = r32
+GR_SAVE_B0 = r33
+GR_SAVE_GP = r34
+GR_Parameter_X = r35
+GR_Parameter_Y = r36
+GR_Parameter_RESULT = r37
+GR_Parameter_TAG = r38
+
+// floating point registers used
+FR_X = f10
+FR_Y = f1
+FR_RESULT = f8
+// scratch
+fRightShifter = f6
+f64DivLn2 = f7
+fNormX = f9
+fNint = f10
+fN = f11
+fR = f12
+fLn2Div64 = f13
+fA2 = f14
+fA3 = f15
+// stacked
+fP = f32
+fT = f33
+fMIN_SGL_OFLOW_ARG = f34
+fMAX_SGL_NORM_ARG = f35
+fRSqr = f36
+fA1 = f37
+fA21 = f37
+fA4 = f38
+fA43 = f38
+fA4321 = f38
+fX4 = f39
+fTmp = f39
+fGt_pln = f39
+fWre_urm_f8 = f40
+fXsq = f40
+fP_neg = f41
+fX3 = f41
+fT_neg = f42
+fExp = f43
+fExp_neg = f44
+fAbsX = f45
+
+
+RODATA
+.align 16
+
+LOCAL_OBJECT_START(_sinhf_table)
+data4 0x42b2d4fd // Smallest single arg to overflow single result
+data4 0x42b2d4fc // Largest single arg to give normal single result
+data4 0x00000000 // pad
+data4 0x00000000 // pad
+//
+// 2^(j/64) table, j goes from 0 to 63
+data8 0x0000000000000000 // 2^(0/64)
+data8 0x00002C9A3E778061 // 2^(1/64)
+data8 0x000059B0D3158574 // 2^(2/64)
+data8 0x0000874518759BC8 // 2^(3/64)
+data8 0x0000B5586CF9890F // 2^(4/64)
+data8 0x0000E3EC32D3D1A2 // 2^(5/64)
+data8 0x00011301D0125B51 // 2^(6/64)
+data8 0x0001429AAEA92DE0 // 2^(7/64)
+data8 0x000172B83C7D517B // 2^(8/64)
+data8 0x0001A35BEB6FCB75 // 2^(9/64)
+data8 0x0001D4873168B9AA // 2^(10/64)
+data8 0x0002063B88628CD6 // 2^(11/64)
+data8 0x0002387A6E756238 // 2^(12/64)
+data8 0x00026B4565E27CDD // 2^(13/64)
+data8 0x00029E9DF51FDEE1 // 2^(14/64)
+data8 0x0002D285A6E4030B // 2^(15/64)
+data8 0x000306FE0A31B715 // 2^(16/64)
+data8 0x00033C08B26416FF // 2^(17/64)
+data8 0x000371A7373AA9CB // 2^(18/64)
+data8 0x0003A7DB34E59FF7 // 2^(19/64)
+data8 0x0003DEA64C123422 // 2^(20/64)
+data8 0x0004160A21F72E2A // 2^(21/64)
+data8 0x00044E086061892D // 2^(22/64)
+data8 0x000486A2B5C13CD0 // 2^(23/64)
+data8 0x0004BFDAD5362A27 // 2^(24/64)
+data8 0x0004F9B2769D2CA7 // 2^(25/64)
+data8 0x0005342B569D4F82 // 2^(26/64)
+data8 0x00056F4736B527DA // 2^(27/64)
+data8 0x0005AB07DD485429 // 2^(28/64)
+data8 0x0005E76F15AD2148 // 2^(29/64)
+data8 0x0006247EB03A5585 // 2^(30/64)
+data8 0x0006623882552225 // 2^(31/64)
+data8 0x0006A09E667F3BCD // 2^(32/64)
+data8 0x0006DFB23C651A2F // 2^(33/64)
+data8 0x00071F75E8EC5F74 // 2^(34/64)
+data8 0x00075FEB564267C9 // 2^(35/64)
+data8 0x0007A11473EB0187 // 2^(36/64)
+data8 0x0007E2F336CF4E62 // 2^(37/64)
+data8 0x00082589994CCE13 // 2^(38/64)
+data8 0x000868D99B4492ED // 2^(39/64)
+data8 0x0008ACE5422AA0DB // 2^(40/64)
+data8 0x0008F1AE99157736 // 2^(41/64)
+data8 0x00093737B0CDC5E5 // 2^(42/64)
+data8 0x00097D829FDE4E50 // 2^(43/64)
+data8 0x0009C49182A3F090 // 2^(44/64)
+data8 0x000A0C667B5DE565 // 2^(45/64)
+data8 0x000A5503B23E255D // 2^(46/64)
+data8 0x000A9E6B5579FDBF // 2^(47/64)
+data8 0x000AE89F995AD3AD // 2^(48/64)
+data8 0x000B33A2B84F15FB // 2^(49/64)
+data8 0x000B7F76F2FB5E47 // 2^(50/64)
+data8 0x000BCC1E904BC1D2 // 2^(51/64)
+data8 0x000C199BDD85529C // 2^(52/64)
+data8 0x000C67F12E57D14B // 2^(53/64)
+data8 0x000CB720DCEF9069 // 2^(54/64)
+data8 0x000D072D4A07897C // 2^(55/64)
+data8 0x000D5818DCFBA487 // 2^(56/64)
+data8 0x000DA9E603DB3285 // 2^(57/64)
+data8 0x000DFC97337B9B5F // 2^(58/64)
+data8 0x000E502EE78B3FF6 // 2^(59/64)
+data8 0x000EA4AFA2A490DA // 2^(60/64)
+data8 0x000EFA1BEE615A27 // 2^(61/64)
+data8 0x000F50765B6E4540 // 2^(62/64)
+data8 0x000FA7C1819E90D8 // 2^(63/64)
+LOCAL_OBJECT_END(_sinhf_table)
+
+LOCAL_OBJECT_START(sinh_p_table)
+data8 0x3ec749d84bc96d7d // A4
+data8 0x3f2a0168d09557cf // A3
+data8 0x3f811111326ed15a // A2
+data8 0x3fc55555552ed1e2 // A1
+LOCAL_OBJECT_END(sinh_p_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(sinhf)
+
+{ .mlx
+ getf.exp rSignexp_x = f8 // Must recompute if x unorm
+ movl r64DivLn2 = 0x40571547652B82FE // 64/ln(2)
+}
+{ .mlx
+ addl rTblAddr = @ltoff(_sinhf_table),gp
+ movl rRightShifter = 0x43E8000000000000 // DP Right Shifter
+}
+;;
+
+{ .mfi
+ // point to the beginning of the table
+ ld8 rTblAddr = [rTblAddr]
+ fclass.m p6, p0 = f8, 0x0b // Test for x=unorm
+ addl rA3 = 0x3E2AA, r0 // high bits of 1.0/6.0 rounded to SP
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 fNormX = f8 // normalized x
+ addl rExpHalf = 0xFFFE, r0 // exponent of 1/2
+}
+;;
+
+{ .mfi
+ setf.d f64DivLn2 = r64DivLn2 // load 64/ln(2) to FP reg
+ fclass.m p15, p0 = f8, 0x1e3 // test for NaT,NaN,Inf
+ nop.i 0
+}
+{ .mlx
+ // load Right Shifter to FP reg
+ setf.d fRightShifter = rRightShifter
+ movl rLn2Div64 = 0x3F862E42FEFA39EF // DP ln(2)/64 in GR
+}
+;;
+
+{ .mfi
+ mov rExp_mask = 0x1ffff
+ fcmp.eq.s1 p13, p0 = f0, f8 // test for x = 0.0
+ shl rA3 = rA3, 12 // 0x3E2AA000, approx to 1.0/6.0 in SP
+}
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt SINH_UNORM // Branch if x=unorm
+}
+;;
+
+SINH_COMMON:
+{ .mfi
+ setf.exp fA2 = rExpHalf // load A2 to FP reg
+ nop.f 0
+ mov rExp_bias = 0xffff
+}
+{ .mfb
+ setf.d fLn2Div64 = rLn2Div64 // load ln(2)/64 to FP reg
+(p15) fma.s.s0 f8 = f8, f1, f0 // result if x = NaT,NaN,Inf
+(p15) br.ret.spnt b0 // exit here if x = NaT,NaN,Inf
+}
+;;
+
+{ .mfi
+ // min overflow and max normal threshold
+ ldfps fMIN_SGL_OFLOW_ARG, fMAX_SGL_NORM_ARG = [rTblAddr], 8
+ nop.f 0
+ and rExp_x = rExp_mask, rSignexp_x // Biased exponent of x
+}
+{ .mfb
+ setf.s fA3 = rA3 // load A3 to FP reg
+ nop.f 0
+(p13) br.ret.spnt b0 // exit here if x=0.0, return x
+}
+;;
+
+{ .mfi
+ sub rExp_x = rExp_x, rExp_bias // True exponent of x
+ fmerge.s fAbsX = f0, fNormX // Form |x|
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // x*(64/ln(2)) + Right Shifter
+ fma.s1 fNint = fNormX, f64DivLn2, fRightShifter
+ add rTblAddr = 8, rTblAddr
+}
+{ .mfb
+ cmp.gt p7, p0 = -2, rExp_x // Test |x| < 2^(-2)
+ fma.s1 fXsq = fNormX, fNormX, f0 // x*x for small path
+(p7) br.cond.spnt SINH_SMALL // Branch if 0 < |x| < 2^-2
+}
+;;
+
+{ .mfi
+ nop.m 0
+ // check for overflow
+ fcmp.ge.s1 p12, p13 = fAbsX, fMIN_SGL_OFLOW_ARG
+ mov rJ_mask = 0x3f // 6-bit mask for J
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fms.s1 fN = fNint, f1, fRightShifter // n in FP register
+ // branch out if overflow
+(p12) br.cond.spnt SINH_CERTAIN_OVERFLOW
+}
+;;
+
+{ .mfi
+ getf.sig rNJ = fNint // bits of n, j
+ // check for possible overflow
+ fcmp.gt.s1 p13, p0 = fAbsX, fMAX_SGL_NORM_ARG
+ nop.i 0
+}
+;;
+
+{ .mfi
+ addl rN = 0xFFBF - 63, rNJ // biased and shifted n-1,j
+ fnma.s1 fR = fLn2Div64, fN, fNormX // R = x - N*ln(2)/64
+ and rJ = rJ_mask, rNJ // bits of j
+}
+{ .mfi
+ sub rNJ_neg = r0, rNJ // bits of n, j for -x
+ nop.f 0
+ andcm rN_mask = -1, rJ_mask // 0xff...fc0 to mask N
+}
+;;
+
+{ .mfi
+ shladd rJ = rJ, 3, rTblAddr // address in the 2^(j/64) table
+ nop.f 0
+ and rN = rN_mask, rN // biased, shifted n-1
+}
+{ .mfi
+ addl rN_neg = 0xFFBF - 63, rNJ_neg // -x biased, shifted n-1,j
+ nop.f 0
+ and rJ_neg = rJ_mask, rNJ_neg // bits of j for -x
+}
+;;
+
+{ .mfi
+ ld8 rJ = [rJ] // Table value
+ nop.f 0
+ shl rN = rN, 46 // 2^(n-1) bits in DP format
+}
+{ .mfi
+ shladd rJ_neg = rJ_neg, 3, rTblAddr // addr in 2^(j/64) table -x
+ nop.f 0
+ and rN_neg = rN_mask, rN_neg // biased, shifted n-1 for -x
+}
+;;
+
+{ .mfi
+ ld8 rJ_neg = [rJ_neg] // Table value for -x
+ nop.f 0
+ shl rN_neg = rN_neg, 46 // 2^(n-1) bits in DP format for -x
+}
+;;
+
+{ .mfi
+ or rN = rN, rJ // bits of 2^n * 2^(j/64) in DP format
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mmf
+ setf.d fT = rN // 2^(n-1) * 2^(j/64)
+ or rN_neg = rN_neg, rJ_neg // -x bits of 2^n * 2^(j/64) in DP
+ fma.s1 fRSqr = fR, fR, f0 // R^2
+}
+;;
+
+{ .mfi
+ setf.d fT_neg = rN_neg // 2^(n-1) * 2^(j/64) for -x
+ fma.s1 fP = fA3, fR, fA2 // A3*R + A2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fnma.s1 fP_neg = fA3, fR, fA2 // A3*R + A2 for -x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fP = fP, fRSqr, fR // P = (A3*R + A2)*R^2 + R
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fms.s1 fP_neg = fP_neg, fRSqr, fR // P = (A3*R + A2)*R^2 + R, -x
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fmpy.s0 fTmp = fLn2Div64, fLn2Div64 // Force inexact
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fExp = fP, fT, fT // exp(x)/2
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s1 fExp_neg = fP_neg, fT_neg, fT_neg // exp(-x)/2
+ // branch out if possible overflow result
+(p13) br.cond.spnt SINH_POSSIBLE_OVERFLOW
+}
+;;
+
+{ .mfb
+ nop.m 0
+ // final result in the absence of overflow
+ fms.s.s0 f8 = fExp, f1, fExp_neg // result = (exp(x)-exp(-x))/2
+ // exit here in the absence of overflow
+ br.ret.sptk b0 // Exit main path, 0.25 <= |x| < 89.41598
+}
+;;
+
+// Here if 0 < |x| < 0.25. Evaluate 9th order polynomial.
+SINH_SMALL:
+{ .mfi
+ add rAd1 = 0x200, rTblAddr
+ fcmp.lt.s1 p7, p8 = fNormX, f0 // Test sign of x
+ cmp.gt p6, p0 = -60, rExp_x // Test |x| < 2^(-60)
+}
+{ .mfi
+ add rAd2 = 0x210, rTblAddr
+ nop.f 0
+ nop.i 0
+}
+;;
+
+{ .mmb
+ ldfpd fA4, fA3 = [rAd1]
+ ldfpd fA2, fA1 = [rAd2]
+(p6) br.cond.spnt SINH_VERY_SMALL // Branch if |x| < 2^(-60)
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fX3 = fXsq, fNormX, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fX4 = fXsq, fXsq, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA43 = fXsq, fA4, fA3
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 fA21 = fXsq, fA2, fA1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 fA4321 = fX4, fA43, fA21
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 fTmp = fA4, fA4
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fA4321, fX3, fNormX
+ br.ret.sptk b0 // Exit if 2^-60 < |x| < 0.25
+}
+;;
+
+SINH_VERY_SMALL:
+// Here if 0 < |x| < 2^-60
+// Compute result by x + sgn(x)*x^2 to get properly rounded result
+.pred.rel "mutex",p7,p8
+{ .mfi
+ nop.m 0
+(p7) fnma.s.s0 f8 = fNormX, fNormX, fNormX // If x<0 result ~ x-x^2
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p8) fma.s.s0 f8 = fNormX, fNormX, fNormX // If x>0 result ~ x+x^2
+ br.ret.sptk b0 // Exit if |x| < 2^-60
+}
+;;
+
+SINH_POSSIBLE_OVERFLOW:
+
+// Here if fMAX_SGL_NORM_ARG < x < fMIN_SGL_OFLOW_ARG
+// This cannot happen if input is a single, only if input higher precision.
+// Overflow is a possibility, not a certainty.
+
+// Recompute result using status field 2 with user's rounding mode,
+// and wre set. If result is larger than largest single, then we have
+// overflow
+
+{ .mfi
+ mov rGt_ln = 0x1007f // Exponent for largest single + 1 ulp
+ fsetc.s2 0x7F,0x42 // Get user's round mode, set wre
+ nop.i 0
+}
+;;
+
+{ .mfi
+ setf.exp fGt_pln = rGt_ln // Create largest single + 1 ulp
+ fma.s.s2 fWre_urm_f8 = fP, fT, fT // Result with wre set
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fsetc.s2 0x7F,0x40 // Turn off wre in sf2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.ge.s1 p6, p0 = fWre_urm_f8, fGt_pln // Test for overflow
+ nop.i 0
+}
+;;
+
+{ .mfb
+ nop.m 0
+ nop.f 0
+(p6) br.cond.spnt SINH_CERTAIN_OVERFLOW // Branch if overflow
+}
+;;
+
+{ .mfb
+ nop.m 0
+ fma.s.s0 f8 = fP, fT, fT
+ br.ret.sptk b0 // Exit if really no overflow
+}
+;;
+
+// here if overflow
+SINH_CERTAIN_OVERFLOW:
+{ .mfi
+ addl r17ones_m1 = 0x1FFFE, r0
+ fcmp.lt.s1 p6, p7 = fNormX, f0 // Test for x < 0
+ nop.i 0
+}
+;;
+
+{ .mmf
+ alloc r32 = ar.pfs, 0, 3, 4, 0 // get some registers
+ setf.exp fTmp = r17ones_m1
+ fmerge.s FR_X = f8,f8
+}
+;;
+
+{ .mfi
+ mov GR_Parameter_TAG = 128
+(p6) fnma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and -INF result
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p7) fma.s.s0 FR_RESULT = fTmp, fTmp, f0 // Set I,O and +INF result
+ br.cond.sptk __libm_error_region
+}
+;;
+
+// Here if x unorm
+SINH_UNORM:
+{ .mfb
+ getf.exp rSignexp_x = fNormX // Must recompute if x unorm
+ fcmp.eq.s0 p6, p0 = f8, f0 // Set D flag
+ br.cond.sptk SINH_COMMON // Return to main path
+}
+;;
+
+GLOBAL_IEEE754_END(sinhf)
+libm_alias_float_other (__sinh, sinh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mfi
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ nop.f 0
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sinhl.S b/sysdeps/ia64/fpu/e_sinhl.S
new file mode 100644
index 0000000000..8ec2dca9e6
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sinhl.S
@@ -0,0 +1,1117 @@
+.file "sinhl.s"
+
+
+// Copyright (c) 2000 - 2002, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+// History
+//==============================================================
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 10/12/00 Update to set denormal operand and underflow flags
+// 01/22/01 Fixed to set inexact flag for small args. Fixed incorrect
+// call to __libm_error_support for 710.476 < x < 11357.2166.
+// 05/02/01 Reworked to improve speed of all paths
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 12/04/02 Improved performance
+//
+// API
+//==============================================================
+// long double = sinhl(long double)
+// input floating point f8
+// output floating point f8
+//
+// Registers used
+//==============================================================
+// general registers:
+// r14 -> r40
+// predicate registers used:
+// p6 -> p11
+// floating-point registers used:
+// f9 -> f15; f32 -> f90;
+// f8 has input, then output
+//
+// Overview of operation
+//==============================================================
+// There are seven paths
+// 1. 0 < |x| < 0.25 SINH_BY_POLY
+// 2. 0.25 <=|x| < 32 SINH_BY_TBL
+// 3. 32 <= |x| < 11357.21655 SINH_BY_EXP (merged path with SINH_BY_TBL)
+// 4. |x| >= 11357.21655 SINH_HUGE
+// 5. x=0 Done with early exit
+// 6. x=inf,nan Done with early exit
+// 7. x=denormal SINH_DENORM
+//
+// For double extended we get overflow for x >= 400c b174 ddc0 31ae c0ea
+// >= 11357.21655
+//
+//
+// 1. SINH_BY_POLY 0 < |x| < 0.25
+// ===============
+// Evaluate sinh(x) by a 13th order polynomial
+// Care is take for the order of multiplication; and P_1 is not exactly 1/3!,
+// P_2 is not exactly 1/5!, etc.
+// sinh(x) = sign * (series(e^x) - series(e^-x))/2
+// = sign * (ax + ax^3/3! + ax^5/5! + ax^7/7! + ax^9/9! + ax^11/11!
+// + ax^13/13!)
+// = sign * (ax + ax * ( ax^2 * (1/3! + ax^4 * (1/7! + ax^4*1/11!)) )
+// + ax * ( ax^4 * (1/5! + ax^4 * (1/9! + ax^4*1/13!)) ))
+// = sign * (ax + ax*p_odd + (ax*p_even))
+// = sign * (ax + Y_lo)
+// sinh(x) = sign * (Y_hi + Y_lo)
+// Note that ax = |x|
+//
+// 2. SINH_BY_TBL 0.25 <= |x| < 32.0
+// =============
+// sinh(x) = sinh(B+R)
+// = sinh(B)cosh(R) + cosh(B)sinh(R)
+//
+// ax = |x| = M*log2/64 + R
+// B = M*log2/64
+// M = 64*N + j
+// We will calculate M and get N as (M-j)/64
+// The division is a shift.
+// exp(B) = exp(N*log2 + j*log2/64)
+// = 2^N * 2^(j*log2/64)
+// sinh(B) = 1/2(e^B -e^-B)
+// = 1/2(2^N * 2^(j*log2/64) - 2^-N * 2^(-j*log2/64))
+// sinh(B) = (2^(N-1) * 2^(j*log2/64) - 2^(-N-1) * 2^(-j*log2/64))
+// cosh(B) = (2^(N-1) * 2^(j*log2/64) + 2^(-N-1) * 2^(-j*log2/64))
+// 2^(j*log2/64) is stored as Tjhi + Tjlo , j= -32,....,32
+// Tjhi is double-extended (80-bit) and Tjlo is single(32-bit)
+//
+// R = ax - M*log2/64
+// R = ax - M*log2_by_64_hi - M*log2_by_64_lo
+// exp(R) = 1 + R +R^2(1/2! + R(1/3! + R(1/4! + ... + R(1/n!)...)
+// = 1 + p_odd + p_even
+// where the p_even uses the A coefficients and the p_even uses
+// the B coefficients
+//
+// So sinh(R) = 1 + p_odd + p_even -(1 -p_odd -p_even)/2 = p_odd
+// cosh(R) = 1 + p_even
+// sinh(B) = S_hi + S_lo
+// cosh(B) = C_hi
+// sinh(x) = sinh(B)cosh(R) + cosh(B)sinh(R)
+//
+// 3. SINH_BY_EXP 32.0 <= |x| < 11357.21655 ( 400c b174 ddc0 31ae c0ea )
+// ==============
+// Can approximate result by exp(x)/2 in this region.
+// Y_hi = Tjhi
+// Y_lo = Tjhi * (p_odd + p_even) + Tjlo
+// sinh(x) = Y_hi + Y_lo
+//
+// 4. SINH_HUGE |x| >= 11357.21655 ( 400c b174 ddc0 31ae c0ea )
+// ============
+// Set error tag and call error support
+//
+//
+// Assembly macros
+//==============================================================
+r_ad5 = r14
+r_rshf_2to57 = r15
+r_exp_denorm = r15
+r_ad_mJ_lo = r15
+r_ad_J_lo = r16
+r_2Nm1 = r17
+r_2mNm1 = r18
+r_exp_x = r18
+r_ad_J_hi = r19
+r_ad2o = r19
+r_ad_mJ_hi = r20
+r_mj = r21
+r_ad2e = r22
+r_ad3 = r23
+r_ad1 = r24
+r_Mmj = r24
+r_rshf = r25
+r_M = r25
+r_N = r25
+r_jshf = r26
+r_exp_2tom57 = r26
+r_j = r26
+r_exp_mask = r27
+r_signexp_x = r28
+r_signexp_sgnx_0_5 = r28
+r_exp_0_25 = r29
+r_sig_inv_ln2 = r30
+r_exp_32 = r30
+r_exp_huge = r30
+r_ad4 = r31
+
+GR_SAVE_PFS = r34
+GR_SAVE_B0 = r35
+GR_SAVE_GP = r36
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+
+f_ABS_X = f9
+f_X2 = f10
+f_X4 = f11
+f_tmp = f14
+f_RSHF = f15
+
+f_Inv_log2by64 = f32
+f_log2by64_lo = f33
+f_log2by64_hi = f34
+f_A1 = f35
+
+f_A2 = f36
+f_A3 = f37
+f_Rcub = f38
+f_M_temp = f39
+f_R_temp = f40
+
+f_Rsq = f41
+f_R = f42
+f_M = f43
+f_B1 = f44
+f_B2 = f45
+
+f_B3 = f46
+f_peven_temp1 = f47
+f_peven_temp2 = f48
+f_peven = f49
+f_podd_temp1 = f50
+
+f_podd_temp2 = f51
+f_podd = f52
+f_poly65 = f53
+f_poly6543 = f53
+f_poly6to1 = f53
+f_poly43 = f54
+f_poly21 = f55
+
+f_X3 = f56
+f_INV_LN2_2TO63 = f57
+f_RSHF_2TO57 = f58
+f_2TOM57 = f59
+f_smlst_oflow_input = f60
+
+f_pre_result = f61
+f_huge = f62
+f_spos = f63
+f_sneg = f64
+f_Tjhi = f65
+
+f_Tjlo = f66
+f_Tmjhi = f67
+f_Tmjlo = f68
+f_S_hi = f69
+f_SC_hi_temp = f70
+
+f_S_lo_temp1 = f71
+f_S_lo_temp2 = f72
+f_S_lo_temp3 = f73
+f_S_lo_temp4 = f73
+f_S_lo = f74
+f_C_hi = f75
+
+f_Y_hi = f77
+f_Y_lo_temp = f78
+f_Y_lo = f79
+f_NORM_X = f80
+
+f_P1 = f81
+f_P2 = f82
+f_P3 = f83
+f_P4 = f84
+f_P5 = f85
+
+f_P6 = f86
+f_Tjhi_spos = f87
+f_Tjlo_spos = f88
+f_huge = f89
+f_signed_hi_lo = f90
+
+
+// Data tables
+//==============================================================
+
+// DO NOT CHANGE ORDER OF THESE TABLES
+RODATA
+
+.align 16
+LOCAL_OBJECT_START(sinh_arg_reduction)
+// data8 0xB8AA3B295C17F0BC, 0x00004005 // 64/log2 -- signif loaded with setf
+ data8 0xB17217F7D1000000, 0x00003FF8 // log2/64 high part
+ data8 0xCF79ABC9E3B39804, 0x00003FD0 // log2/64 low part
+ data8 0xb174ddc031aec0ea, 0x0000400c // Smallest x to overflow (11357.21655)
+LOCAL_OBJECT_END(sinh_arg_reduction)
+
+LOCAL_OBJECT_START(sinh_p_table)
+ data8 0xB08AF9AE78C1239F, 0x00003FDE // P6
+ data8 0xB8EF1D28926D8891, 0x00003FEC // P4
+ data8 0x8888888888888412, 0x00003FF8 // P2
+ data8 0xD732377688025BE9, 0x00003FE5 // P5
+ data8 0xD00D00D00D4D39F2, 0x00003FF2 // P3
+ data8 0xAAAAAAAAAAAAAAAB, 0x00003FFC // P1
+LOCAL_OBJECT_END(sinh_p_table)
+
+LOCAL_OBJECT_START(sinh_ab_table)
+ data8 0xAAAAAAAAAAAAAAAC, 0x00003FFC // A1
+ data8 0x88888888884ECDD5, 0x00003FF8 // A2
+ data8 0xD00D0C6DCC26A86B, 0x00003FF2 // A3
+ data8 0x8000000000000002, 0x00003FFE // B1
+ data8 0xAAAAAAAAAA402C77, 0x00003FFA // B2
+ data8 0xB60B6CC96BDB144D, 0x00003FF5 // B3
+LOCAL_OBJECT_END(sinh_ab_table)
+
+LOCAL_OBJECT_START(sinh_j_hi_table)
+ data8 0xB504F333F9DE6484, 0x00003FFE
+ data8 0xB6FD91E328D17791, 0x00003FFE
+ data8 0xB8FBAF4762FB9EE9, 0x00003FFE
+ data8 0xBAFF5AB2133E45FB, 0x00003FFE
+ data8 0xBD08A39F580C36BF, 0x00003FFE
+ data8 0xBF1799B67A731083, 0x00003FFE
+ data8 0xC12C4CCA66709456, 0x00003FFE
+ data8 0xC346CCDA24976407, 0x00003FFE
+ data8 0xC5672A115506DADD, 0x00003FFE
+ data8 0xC78D74C8ABB9B15D, 0x00003FFE
+ data8 0xC9B9BD866E2F27A3, 0x00003FFE
+ data8 0xCBEC14FEF2727C5D, 0x00003FFE
+ data8 0xCE248C151F8480E4, 0x00003FFE
+ data8 0xD06333DAEF2B2595, 0x00003FFE
+ data8 0xD2A81D91F12AE45A, 0x00003FFE
+ data8 0xD4F35AABCFEDFA1F, 0x00003FFE
+ data8 0xD744FCCAD69D6AF4, 0x00003FFE
+ data8 0xD99D15C278AFD7B6, 0x00003FFE
+ data8 0xDBFBB797DAF23755, 0x00003FFE
+ data8 0xDE60F4825E0E9124, 0x00003FFE
+ data8 0xE0CCDEEC2A94E111, 0x00003FFE
+ data8 0xE33F8972BE8A5A51, 0x00003FFE
+ data8 0xE5B906E77C8348A8, 0x00003FFE
+ data8 0xE8396A503C4BDC68, 0x00003FFE
+ data8 0xEAC0C6E7DD24392F, 0x00003FFE
+ data8 0xED4F301ED9942B84, 0x00003FFE
+ data8 0xEFE4B99BDCDAF5CB, 0x00003FFE
+ data8 0xF281773C59FFB13A, 0x00003FFE
+ data8 0xF5257D152486CC2C, 0x00003FFE
+ data8 0xF7D0DF730AD13BB9, 0x00003FFE
+ data8 0xFA83B2DB722A033A, 0x00003FFE
+ data8 0xFD3E0C0CF486C175, 0x00003FFE
+ data8 0x8000000000000000, 0x00003FFF // Center of table
+ data8 0x8164D1F3BC030773, 0x00003FFF
+ data8 0x82CD8698AC2BA1D7, 0x00003FFF
+ data8 0x843A28C3ACDE4046, 0x00003FFF
+ data8 0x85AAC367CC487B15, 0x00003FFF
+ data8 0x871F61969E8D1010, 0x00003FFF
+ data8 0x88980E8092DA8527, 0x00003FFF
+ data8 0x8A14D575496EFD9A, 0x00003FFF
+ data8 0x8B95C1E3EA8BD6E7, 0x00003FFF
+ data8 0x8D1ADF5B7E5BA9E6, 0x00003FFF
+ data8 0x8EA4398B45CD53C0, 0x00003FFF
+ data8 0x9031DC431466B1DC, 0x00003FFF
+ data8 0x91C3D373AB11C336, 0x00003FFF
+ data8 0x935A2B2F13E6E92C, 0x00003FFF
+ data8 0x94F4EFA8FEF70961, 0x00003FFF
+ data8 0x96942D3720185A00, 0x00003FFF
+ data8 0x9837F0518DB8A96F, 0x00003FFF
+ data8 0x99E0459320B7FA65, 0x00003FFF
+ data8 0x9B8D39B9D54E5539, 0x00003FFF
+ data8 0x9D3ED9A72CFFB751, 0x00003FFF
+ data8 0x9EF5326091A111AE, 0x00003FFF
+ data8 0xA0B0510FB9714FC2, 0x00003FFF
+ data8 0xA27043030C496819, 0x00003FFF
+ data8 0xA43515AE09E6809E, 0x00003FFF
+ data8 0xA5FED6A9B15138EA, 0x00003FFF
+ data8 0xA7CD93B4E965356A, 0x00003FFF
+ data8 0xA9A15AB4EA7C0EF8, 0x00003FFF
+ data8 0xAB7A39B5A93ED337, 0x00003FFF
+ data8 0xAD583EEA42A14AC6, 0x00003FFF
+ data8 0xAF3B78AD690A4375, 0x00003FFF
+ data8 0xB123F581D2AC2590, 0x00003FFF
+ data8 0xB311C412A9112489, 0x00003FFF
+ data8 0xB504F333F9DE6484, 0x00003FFF
+LOCAL_OBJECT_END(sinh_j_hi_table)
+
+LOCAL_OBJECT_START(sinh_j_lo_table)
+ data4 0x1EB2FB13
+ data4 0x1CE2CBE2
+ data4 0x1DDC3CBC
+ data4 0x1EE9AA34
+ data4 0x9EAEFDC1
+ data4 0x9DBF517B
+ data4 0x1EF88AFB
+ data4 0x1E03B216
+ data4 0x1E78AB43
+ data4 0x9E7B1747
+ data4 0x9EFE3C0E
+ data4 0x9D36F837
+ data4 0x9DEE53E4
+ data4 0x9E24AE8E
+ data4 0x1D912473
+ data4 0x1EB243BE
+ data4 0x1E669A2F
+ data4 0x9BBC610A
+ data4 0x1E761035
+ data4 0x9E0BE175
+ data4 0x1CCB12A1
+ data4 0x1D1BFE90
+ data4 0x1DF2F47A
+ data4 0x1EF22F22
+ data4 0x9E3F4A29
+ data4 0x1EC01A5B
+ data4 0x1E8CAC3A
+ data4 0x9DBB3FAB
+ data4 0x1EF73A19
+ data4 0x9BB795B5
+ data4 0x1EF84B76
+ data4 0x9EF5818B
+ data4 0x00000000 // Center of table
+ data4 0x1F77CACA
+ data4 0x1EF8A91D
+ data4 0x1E57C976
+ data4 0x9EE8DA92
+ data4 0x1EE85C9F
+ data4 0x1F3BF1AF
+ data4 0x1D80CA1E
+ data4 0x9D0373AF
+ data4 0x9F167097
+ data4 0x1EB70051
+ data4 0x1F6EB029
+ data4 0x1DFD6D8E
+ data4 0x9EB319B0
+ data4 0x1EBA2BEB
+ data4 0x1F11D537
+ data4 0x1F0D5A46
+ data4 0x9E5E7BCA
+ data4 0x9F3AAFD1
+ data4 0x9E86DACC
+ data4 0x9F3EDDC2
+ data4 0x1E496E3D
+ data4 0x9F490BF6
+ data4 0x1DD1DB48
+ data4 0x1E65EBFB
+ data4 0x9F427496
+ data4 0x1F283C4A
+ data4 0x1F4B0047
+ data4 0x1F130152
+ data4 0x9E8367C0
+ data4 0x9F705F90
+ data4 0x1EFB3C53
+ data4 0x1F32FB13
+LOCAL_OBJECT_END(sinh_j_lo_table)
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(sinhl)
+
+{ .mlx
+ getf.exp r_signexp_x = f8 // Get signexp of x, must redo if unorm
+ movl r_sig_inv_ln2 = 0xb8aa3b295c17f0bc // significand of 1/ln2
+}
+{ .mlx
+ addl r_ad1 = @ltoff(sinh_arg_reduction), gp
+ movl r_rshf_2to57 = 0x4778000000000000 // 1.10000 2^(63+57)
+}
+;;
+
+{ .mfi
+ ld8 r_ad1 = [r_ad1]
+ fmerge.s f_ABS_X = f0,f8
+ mov r_exp_0_25 = 0x0fffd // Form exponent for 0.25
+}
+{ .mfi
+ nop.m 0
+ fnorm.s1 f_NORM_X = f8
+ mov r_exp_2tom57 = 0xffff-57
+}
+;;
+
+{ .mfi
+ setf.d f_RSHF_2TO57 = r_rshf_2to57 // Form const 1.100 * 2^120
+ fclass.m p10,p0 = f8, 0x0b // Test for denorm
+ mov r_exp_mask = 0x1ffff
+}
+{ .mlx
+ setf.sig f_INV_LN2_2TO63 = r_sig_inv_ln2 // Form 1/ln2 * 2^63
+ movl r_rshf = 0x43e8000000000000 // 1.1000 2^63 for right shift
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fclass.m p7,p0 = f8, 0x07 // Test if x=0
+ nop.i 0
+}
+{ .mfi
+ setf.exp f_2TOM57 = r_exp_2tom57 // Form 2^-57 for scaling
+ nop.f 0
+ add r_ad3 = 0x90, r_ad1 // Point to ab_table
+}
+;;
+
+{ .mfi
+ setf.d f_RSHF = r_rshf // Form right shift const 1.100 * 2^63
+ fclass.m p6,p0 = f8, 0xe3 // Test if x nan, inf
+ add r_ad4 = 0x2f0, r_ad1 // Point to j_hi_table midpoint
+}
+{ .mib
+ add r_ad2e = 0x20, r_ad1 // Point to p_table
+ nop.i 0
+(p10) br.cond.spnt SINH_DENORM // Branch if x denorm
+}
+;;
+
+// Common path -- return here from SINH_DENORM if x is unnorm
+SINH_COMMON:
+{ .mfi
+ ldfe f_smlst_oflow_input = [r_ad2e],16
+ nop.f 0
+ add r_ad5 = 0x580, r_ad1 // Point to j_lo_table midpoint
+}
+{ .mib
+ ldfe f_log2by64_hi = [r_ad1],16
+ and r_exp_x = r_exp_mask, r_signexp_x
+(p7) br.ret.spnt b0 // Exit if x=0
+}
+;;
+
+// Get the A coefficients for SINH_BY_TBL
+{ .mfi
+ ldfe f_A1 = [r_ad3],16
+ fcmp.lt.s1 p8,p9 = f8,f0 // Test for x<0
+ cmp.lt p7,p0 = r_exp_x, r_exp_0_25 // Test x < 0.25
+}
+{ .mfb
+ add r_ad2o = 0x30, r_ad2e // Point to p_table odd coeffs
+(p6) fma.s0 f8 = f8,f1,f0 // Result for x nan, inf
+(p6) br.ret.spnt b0 // Exit for x nan, inf
+}
+;;
+
+// Calculate X2 = ax*ax for SINH_BY_POLY
+{ .mfi
+ ldfe f_log2by64_lo = [r_ad1],16
+ nop.f 0
+ nop.i 0
+}
+{ .mfb
+ ldfe f_A2 = [r_ad3],16
+ fma.s1 f_X2 = f_NORM_X, f_NORM_X, f0
+(p7) br.cond.spnt SINH_BY_POLY
+}
+;;
+
+// Here if |x| >= 0.25
+SINH_BY_TBL:
+// ******************************************************
+// STEP 1 (TBL and EXP) - Argument reduction
+// ******************************************************
+// Get the following constants.
+// Inv_log2by64
+// log2by64_hi
+// log2by64_lo
+
+
+// We want 2^(N-1) and 2^(-N-1). So bias N-1 and -N-1 and
+// put them in an exponent.
+// f_spos = 2^(N-1) and f_sneg = 2^(-N-1)
+// 0xffff + (N-1) = 0xffff +N -1
+// 0xffff - (N +1) = 0xffff -N -1
+
+
+// Calculate M and keep it as integer and floating point.
+// M = round-to-integer(x*Inv_log2by64)
+// f_M = M = truncate(ax/(log2/64))
+// Put the integer representation of M in r_M
+// and the floating point representation of M in f_M
+
+// Get the remaining A,B coefficients
+{ .mmi
+ ldfe f_A3 = [r_ad3],16
+ nop.m 0
+ nop.i 0
+}
+;;
+
+.pred.rel "mutex",p8,p9
+// Use constant (1.100*2^(63-6)) to get rounded M into rightmost significand
+// |x| * 64 * 1/ln2 * 2^(63-6) + 1.1000 * 2^(63+(63-6))
+{ .mfi
+(p8) mov r_signexp_sgnx_0_5 = 0x2fffe // signexp of -0.5
+ fma.s1 f_M_temp = f_ABS_X, f_INV_LN2_2TO63, f_RSHF_2TO57
+(p9) mov r_signexp_sgnx_0_5 = 0x0fffe // signexp of +0.5
+}
+;;
+
+// Test for |x| >= overflow limit
+{ .mfi
+ ldfe f_B1 = [r_ad3],16
+ fcmp.ge.s1 p6,p0 = f_ABS_X, f_smlst_oflow_input
+ nop.i 0
+}
+;;
+
+{ .mfi
+ ldfe f_B2 = [r_ad3],16
+ nop.f 0
+ mov r_exp_32 = 0x10004
+}
+;;
+
+// Subtract RSHF constant to get rounded M as a floating point value
+// M_temp * 2^(63-6) - 2^63
+{ .mfb
+ ldfe f_B3 = [r_ad3],16
+ fms.s1 f_M = f_M_temp, f_2TOM57, f_RSHF
+(p6) br.cond.spnt SINH_HUGE // Branch if result will overflow
+}
+;;
+
+{ .mfi
+ getf.sig r_M = f_M_temp
+ nop.f 0
+ cmp.ge p7,p6 = r_exp_x, r_exp_32 // Test if x >= 32
+}
+;;
+
+// Calculate j. j is the signed extension of the six lsb of M. It
+// has a range of -32 thru 31.
+
+// Calculate R
+// ax - M*log2by64_hi
+// R = (ax - M*log2by64_hi) - M*log2by64_lo
+
+{ .mfi
+ nop.m 0
+ fnma.s1 f_R_temp = f_M, f_log2by64_hi, f_ABS_X
+ and r_j = 0x3f, r_M
+}
+;;
+
+{ .mii
+ nop.m 0
+ shl r_jshf = r_j, 0x2 // Shift j so can sign extend it
+;;
+ sxt1 r_jshf = r_jshf
+}
+;;
+
+{ .mii
+ nop.m 0
+ shr r_j = r_jshf, 0x2 // Now j has range -32 to 31
+ nop.i 0
+}
+;;
+
+{ .mmi
+ shladd r_ad_J_hi = r_j, 4, r_ad4 // pointer to Tjhi
+ sub r_Mmj = r_M, r_j // M-j
+ sub r_mj = r0, r_j // Form -j
+}
+;;
+
+// The TBL and EXP branches are merged and predicated
+// If TBL, p6 true, 0.25 <= |x| < 32
+// If EXP, p7 true, 32 <= |x| < overflow_limit
+//
+// N = (M-j)/64
+{ .mfi
+ ldfe f_Tjhi = [r_ad_J_hi]
+ fnma.s1 f_R = f_M, f_log2by64_lo, f_R_temp
+ shr r_N = r_Mmj, 0x6 // N = (M-j)/64
+}
+{ .mfi
+ shladd r_ad_mJ_hi = r_mj, 4, r_ad4 // pointer to Tmjhi
+ nop.f 0
+ shladd r_ad_mJ_lo = r_mj, 2, r_ad5 // pointer to Tmjlo
+}
+;;
+
+{ .mfi
+ sub r_2mNm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(-N-1)
+ nop.f 0
+ shladd r_ad_J_lo = r_j, 2, r_ad5 // pointer to Tjlo
+}
+{ .mfi
+ ldfe f_Tmjhi = [r_ad_mJ_hi]
+ nop.f 0
+ add r_2Nm1 = r_signexp_sgnx_0_5, r_N // signexp sgnx*2^(N-1)
+}
+;;
+
+{ .mmf
+ ldfs f_Tmjlo = [r_ad_mJ_lo]
+ setf.exp f_sneg = r_2mNm1 // Form sgnx * 2^(-N-1)
+ nop.f 0
+}
+;;
+
+{ .mmf
+ ldfs f_Tjlo = [r_ad_J_lo]
+ setf.exp f_spos = r_2Nm1 // Form sgnx * 2^(N-1)
+ nop.f 0
+}
+;;
+
+// ******************************************************
+// STEP 2 (TBL and EXP)
+// ******************************************************
+// Calculate Rsquared and Rcubed in preparation for p_even and p_odd
+
+{ .mmf
+ nop.m 0
+ nop.m 0
+ fma.s1 f_Rsq = f_R, f_R, f0
+}
+;;
+
+
+// Calculate p_even
+// B_2 + Rsq *B_3
+// B_1 + Rsq * (B_2 + Rsq *B_3)
+// p_even = Rsq * (B_1 + Rsq * (B_2 + Rsq *B_3))
+{ .mfi
+ nop.m 0
+ fma.s1 f_peven_temp1 = f_Rsq, f_B3, f_B2
+ nop.i 0
+}
+// Calculate p_odd
+// A_2 + Rsq *A_3
+// A_1 + Rsq * (A_2 + Rsq *A_3)
+// podd = R + Rcub * (A_1 + Rsq * (A_2 + Rsq *A_3))
+{ .mfi
+ nop.m 0
+ fma.s1 f_podd_temp1 = f_Rsq, f_A3, f_A2
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_Rcub = f_Rsq, f_R, f0
+ nop.i 0
+}
+;;
+
+//
+// If TBL,
+// Calculate S_hi and S_lo, and C_hi
+// SC_hi_temp = sneg * Tmjhi
+// S_hi = spos * Tjhi - SC_hi_temp
+// S_hi = spos * Tjhi - (sneg * Tmjhi)
+// C_hi = spos * Tjhi + SC_hi_temp
+// C_hi = spos * Tjhi + (sneg * Tmjhi)
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_SC_hi_temp = f_sneg, f_Tmjhi, f0
+ nop.i 0
+}
+;;
+
+// If TBL,
+// S_lo_temp3 = sneg * Tmjlo
+// S_lo_temp4 = spos * Tjlo - S_lo_temp3
+// S_lo_temp4 = spos * Tjlo -(sneg * Tmjlo)
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_S_lo_temp3 = f_sneg, f_Tmjlo, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_peven_temp2 = f_Rsq, f_peven_temp1, f_B1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_podd_temp2 = f_Rsq, f_podd_temp1, f_A1
+ nop.i 0
+}
+;;
+
+// If EXP,
+// Compute sgnx * 2^(N-1) * Tjhi and sgnx * 2^(N-1) * Tjlo
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Tjhi_spos = f_Tjhi, f_spos, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Tjlo_spos = f_Tjlo, f_spos, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fms.s1 f_S_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_C_hi = f_spos, f_Tjhi, f_SC_hi_temp
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p6) fms.s1 f_S_lo_temp4 = f_spos, f_Tjlo, f_S_lo_temp3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_peven = f_Rsq, f_peven_temp2, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_podd = f_podd_temp2, f_Rcub, f_R
+ nop.i 0
+}
+;;
+
+// If TBL,
+// S_lo_temp1 = spos * Tjhi - S_hi
+// S_lo_temp2 = -sneg * Tmjlo + S_lo_temp1
+// S_lo_temp2 = -sneg * Tmjlo + (spos * Tjhi - S_hi)
+
+{ .mfi
+ nop.m 0
+(p6) fms.s1 f_S_lo_temp1 = f_spos, f_Tjhi, f_S_hi
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fnma.s1 f_S_lo_temp2 = f_sneg, f_Tmjhi, f_S_lo_temp1
+ nop.i 0
+}
+;;
+
+// If EXP,
+// Y_hi = sgnx * 2^(N-1) * Tjhi
+// Y_lo = sgnx * 2^(N-1) * Tjhi * (p_odd + p_even) + sgnx * 2^(N-1) * Tjlo
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Y_lo_temp = f_peven, f1, f_podd
+ nop.i 0
+}
+;;
+
+// If TBL,
+// S_lo = S_lo_temp4 + S_lo_temp2
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_S_lo = f_S_lo_temp4, f1, f_S_lo_temp2
+ nop.i 0
+}
+;;
+
+// If TBL,
+// Y_hi = S_hi
+// Y_lo = C_hi*p_odd + (S_hi*p_even + S_lo)
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_Y_lo_temp = f_S_hi, f_peven, f_S_lo
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p7) fma.s1 f_Y_lo = f_Tjhi_spos, f_Y_lo_temp, f_Tjlo_spos
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 f_tmp = f_B2, f_B2
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p6) fma.s1 f_Y_lo = f_C_hi, f_podd, f_Y_lo_temp
+ nop.i 0
+}
+;;
+
+// f8 = answer = Y_hi + Y_lo
+{ .mfi
+ nop.m 0
+(p7) fma.s0 f8 = f_Y_lo, f1, f_Tjhi_spos
+ nop.i 0
+}
+;;
+
+// f8 = answer = Y_hi + Y_lo
+{ .mfb
+ nop.m 0
+(p6) fma.s0 f8 = f_Y_lo, f1, f_S_hi
+ br.ret.sptk b0 // Exit for SINH_BY_TBL and SINH_BY_EXP
+}
+;;
+
+
+// Here if 0 < |x| < 0.25
+SINH_BY_POLY:
+{ .mmf
+ ldfe f_P6 = [r_ad2e],16
+ ldfe f_P5 = [r_ad2o],16
+ nop.f 0
+}
+;;
+
+{ .mmi
+ ldfe f_P4 = [r_ad2e],16
+ ldfe f_P3 = [r_ad2o],16
+ nop.i 0
+}
+;;
+
+{ .mmi
+ ldfe f_P2 = [r_ad2e],16
+ ldfe f_P1 = [r_ad2o],16
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_X3 = f_NORM_X, f_X2, f0
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_X4 = f_X2, f_X2, f0
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly65 = f_X2, f_P6, f_P5
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly43 = f_X2, f_P4, f_P3
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly21 = f_X2, f_P2, f_P1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly6543 = f_X4, f_poly65, f_poly43
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s1 f_poly6to1 = f_X4, f_poly6543, f_poly21
+ nop.i 0
+}
+;;
+
+// Dummy multiply to generate inexact
+{ .mfi
+ nop.m 0
+ fmpy.s0 f_tmp = f_P6, f_P6
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+ fma.s0 f8 = f_poly6to1, f_X3, f_NORM_X
+ br.ret.sptk b0 // Exit SINH_BY_POLY
+}
+;;
+
+
+// Here if x denorm or unorm
+SINH_DENORM:
+// Determine if x really a denorm and not a unorm
+{ .mmf
+ getf.exp r_signexp_x = f_NORM_X
+ mov r_exp_denorm = 0x0c001 // Real denorms have exp < this
+ fmerge.s f_ABS_X = f0, f_NORM_X
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fcmp.eq.s0 p10,p0 = f8, f0 // Set denorm flag
+ nop.i 0
+}
+;;
+
+// Set p8 if really a denorm
+{ .mmi
+ and r_exp_x = r_exp_mask, r_signexp_x
+;;
+ cmp.lt p8,p9 = r_exp_x, r_exp_denorm
+ nop.i 0
+}
+;;
+
+// Identify denormal operands.
+{ .mfb
+ nop.m 0
+(p8) fcmp.ge.unc.s1 p6,p7 = f8, f0 // Test sign of denorm
+(p9) br.cond.sptk SINH_COMMON // Return to main path if x unorm
+}
+;;
+
+{ .mfi
+ nop.m 0
+(p6) fma.s0 f8 = f8,f8,f8 // If x +denorm, result=x+x^2
+ nop.i 0
+}
+{ .mfb
+ nop.m 0
+(p7) fnma.s0 f8 = f8,f8,f8 // If x -denorm, result=x-x^2
+ br.ret.sptk b0 // Exit if x denorm
+}
+;;
+
+
+// Here if |x| >= overflow limit
+SINH_HUGE:
+// for SINH_HUGE, put 24000 in exponent; take sign from input
+{ .mmi
+ mov r_exp_huge = 0x15dbf
+;;
+ setf.exp f_huge = r_exp_huge
+ nop.i 0
+}
+;;
+
+.pred.rel "mutex",p8,p9
+{ .mfi
+ alloc r32 = ar.pfs,0,5,4,0
+(p8) fnma.s1 f_signed_hi_lo = f_huge, f1, f1
+ nop.i 0
+}
+{ .mfi
+ nop.m 0
+(p9) fma.s1 f_signed_hi_lo = f_huge, f1, f1
+ nop.i 0
+}
+;;
+
+{ .mfi
+ nop.m 0
+ fma.s0 f_pre_result = f_signed_hi_lo, f_huge, f0
+ mov GR_Parameter_TAG = 126
+}
+;;
+
+GLOBAL_IEEE754_END(sinhl)
+libm_alias_ldouble_other (__sinh, sinh)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+{ .mmi
+ stfe [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+{ .mib
+ stfe [GR_Parameter_X] = f8 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfe [GR_Parameter_Y] = f_pre_result // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+
+{ .mmi
+ add GR_Parameter_RESULT = 48,sp
+ nop.m 0
+ nop.i 0
+};;
+
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrt.S b/sysdeps/ia64/fpu/e_sqrt.S
new file mode 100644
index 0000000000..3b9c17059e
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrt.S
@@ -0,0 +1,343 @@
+.file "sqrt.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//********************************************************************
+// History
+//********************************************************************
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+//********************************************************************
+//
+// Function: Combined sqrt(x), where
+// _
+// sqrt(x) = |x, for double precision x values
+//
+//********************************************************************
+//
+// Accuracy: Correctly Rounded
+//
+//********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f7 -f14
+//
+// General Purpose Registers:
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6, p7, p8
+//
+//*********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// sqrt(QNaN) = QNaN
+// sqrt(SNaN) = QNaN
+// sqrt(+/-0) = +/-0
+// sqrt(negative) = QNaN and error handling is called
+//
+//*********************************************************************
+//
+// Implementation:
+//
+// Modified Newton-Raphson Algorithm
+//
+//*********************************************************************
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(sqrt)
+{ .mfi
+ alloc r32= ar.pfs,0,5,4,0
+ frsqrta.s0 f7,p6=f8
+ nop.i 0
+} { .mlx
+ // BEGIN DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+ nop.m 0
+ // exponent of +1/2 in r2
+ movl r2 = 0x0fffe;;
+} { .mmi
+ // +1/2 in f9
+ setf.exp f9 = r2
+ nop.m 0
+ nop.i 0
+} { .mlx
+ nop.m 0
+ // 3/2 in r3
+ movl r3=0x3fc00000;;
+} { .mfi
+ setf.s f10=r3
+ // Step (1)
+ // y0 = 1/sqrt(a) in f7
+ fclass.m.unc p7,p8 = f8,0x3A
+ nop.i 0;;
+} { .mlx
+ nop.m 0
+ // 5/2 in r2
+ movl r2 = 0x40200000
+} { .mlx
+ nop.m 0
+ // 63/8 in r3
+ movl r3 = 0x40fc0000;;
+} { .mfi
+ setf.s f11=r2
+ // Step (2)
+ // h = +1/2 * y0 in f6
+ (p6) fma.s1 f6=f9,f7,f0
+ nop.i 0
+} { .mfi
+ setf.s f12=r3
+ // Step (3)
+ // g = a * y0 in f7
+ (p6) fma.s1 f7=f8,f7,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ mov f15 = f8
+ nop.i 0;;
+} { .mlx
+ nop.m 0
+ // 231/16 in r2
+ movl r2 = 0x41670000;;
+} { .mfi
+ setf.s f13=r2
+ // Step (4)
+ // e = 1/2 - g * h in f9
+ (p6) fnma.s1 f9=f7,f6,f9
+ nop.i 0
+} { .mlx
+ nop.m 0
+ // 35/8 in r3
+ movl r3 = 0x408c0000;;
+} { .mfi
+ setf.s f14=r3
+ // Step (5)
+ // S = 3/2 + 5/2 * e in f10
+ (p6) fma.s1 f10=f11,f9,f10
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // e2 = e * e in f11
+ (p6) fma.s1 f11=f9,f9,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (7)
+ // t = 63/8 + 231/16 * e in f12
+ (p6) fma.s1 f12=f13,f9,f12
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (8)
+ // S1 = e + e2 * S in f10
+ (p6) fma.s1 f10=f11,f10,f9
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (9)
+ // e4 = e2 * e2 in f11
+ (p6) fma.s1 f11=f11,f11,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (10)
+ // t1 = 35/8 + e * t in f9
+ (p6) fma.s1 f9=f9,f12,f14
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (11)
+ // G = g + S1 * g in f12
+ (p6) fma.s1 f12=f10,f7,f7
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (12)
+ // E = g * e4 in f7
+ (p6) fma.s1 f7=f7,f11,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (13)
+ // u = S1 + e4 * t1 in f10
+ (p6) fma.s1 f10=f11,f9,f10
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (14)
+ // g1 = G + t1 * E in f7
+ (p6) fma.d.s1 f7=f9,f7,f12
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (15)
+ // h1 = h + u * h in f6
+ (p6) fma.s1 f6=f10,f6,f6
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (16)
+ // d = a - g1 * g1 in f9
+ (p6) fnma.s1 f9=f7,f7,f8
+ nop.i 0;;
+} { .mfb
+ nop.m 0
+ // Step (17)
+ // g2 = g1 + d * h1 in f7
+ (p6) fma.d.s0 f8=f9,f6,f7
+ (p6) br.ret.sptk b0 ;;
+}
+
+{ .mfb
+ nop.m 0
+ mov f8 = f7
+ (p8) br.ret.sptk b0 ;;
+}
+{ .mfb
+ (p7) mov r40 = 49
+ nop.f 0
+ (p7) br.cond.sptk __libm_error_region ;;
+}
+// END DOUBLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+GLOBAL_IEEE754_END(sqrt)
+libm_alias_double_other (__sqrt, sqrt)
+libm_alias_double_narrow (__sqrt, sqrt)
+
+// Stack operations when calling error support.
+// (1) (2) (3) (call) (4)
+// sp -> + psp -> + psp -> + sp -> +
+// | | | |
+// | | <- GR_Y R3 ->| <- GR_RESULT | -> f8
+// | | | |
+// | <-GR_Y Y2->| Y2 ->| <- GR_Y |
+// | | | |
+// | | <- GR_X X1 ->| |
+// | | | |
+// sp-64 -> + sp -> + sp -> + +
+// save ar.pfs save b0 restore gp
+// save gp restore ar.pfs
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+
+//
+// This branch includes all those special values that are not negative,
+// with the result equal to frcpa(x)
+//
+
+.prologue
+// We are distinguishing between over(under)flow and letting
+// __libm_error_support set ERANGE or do anything else needed.
+
+// (1)
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+
+
+// (2)
+{ .mmi
+ stfd [GR_Parameter_Y] = f0,16 // STORE Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+
+.body
+// (3)
+{ .mib
+ stfd [GR_Parameter_X] = f15 // STORE Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y // Parameter 3 address
+ nop.b 0
+}
+{ .mib
+ stfd [GR_Parameter_Y] = f8 // STORE Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+
+// (4)
+{ .mmi
+ ldfd f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrtf.S b/sysdeps/ia64/fpu/e_sqrtf.S
new file mode 100644
index 0000000000..df535d6129
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrtf.S
@@ -0,0 +1,260 @@
+.file "sqrtf.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//*********************************************************************
+// History:
+//
+// 02/02/00 Initial version
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+//*********************************************************************
+//
+// Function: Combined sqrtf(x), where
+// _
+// sqrtf(x) = |x, for single precision x values
+//
+//********************************************************************
+//
+// Accuracy: Correctly Rounded
+//
+//********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f7 -f14
+//
+// General Purpose Registers:
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6, p7, p8
+//
+//********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// sqrtf(QNaN) = QNaN
+// sqrtf(SNaN) = QNaN
+// sqrtf(+/-0) = +/-0
+// sqrtf(negative) = QNaN and error handling is called
+//
+//********************************************************************
+//
+// Implementation:
+//
+// Modified Newton-Raphson Algorithm
+//
+//********************************************************************
+
+
+GR_SAVE_B0 = r34
+GR_SAVE_PFS = r33
+GR_SAVE_GP = r35
+
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f13
+FR_Y = f0
+FR_RESULT = f8
+
+
+.section .text
+GLOBAL_IEEE754_ENTRY(sqrtf)
+{ .mlx
+ // BEGIN SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+ alloc r32= ar.pfs,0,5,4,0
+ // exponent of +1/2 in r2
+ movl r2 = 0x0fffe
+} { .mfi
+ // +1/2 in f12
+ nop.m 0
+ frsqrta.s0 f7,p6=f8
+ nop.i 0;;
+} { .mfi
+ setf.exp f12 = r2
+ // Step (1)
+ // y0 = 1/sqrt(a) in f7
+ fclass.m.unc p7,p8 = f8,0x3A
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Make a copy of x just in case
+ mov f13 = f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (2)
+ // H0 = 1/2 * y0 in f9
+ (p6) fma.s1 f9=f12,f7,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // S0 = a * y0 in f7
+ (p6) fma.s1 f7=f8,f7,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (4)
+ // d = 1/2 - S0 * H0 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (0'')
+ // 3/2 = 1 + 1/2 in f12
+ (p6) fma.s1 f12=f12,f1,f1
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (5)
+ // e = 1 + 3/2 * d in f12
+ (p6) fma.s1 f12=f12,f10,f1
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // T0 = d * S0 in f11
+ (p6) fma.s1 f11=f10,f7,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (7)
+ // G0 = d * H0 in f10
+ (p6) fma.s1 f10=f10,f9,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (8)
+ // S1 = S0 + e * T0 in f7
+ (p6) fma.s.s1 f7=f12,f11,f7
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (9)
+ // H1 = H0 + e * G0 in f12
+ (p6) fma.s1 f12=f12,f10,f9
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (10)
+ // d1 = a - S1 * S1 in f9
+ (p6) fnma.s1 f9=f7,f7,f8
+ nop.i 0;;
+} { .mfb
+ nop.m 0
+ // Step (11)
+ // S = S1 + d1 * H1 in f7
+ (p6) fma.s.s0 f8=f9,f12,f7
+ (p6) br.ret.sptk b0 ;;
+// END SINGLE PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+} { .mfb
+ nop.m 0
+ mov f8 = f7
+ (p8) br.ret.sptk b0 ;;
+}
+//
+// This branch includes all those special values that are not negative,
+// with the result equal to frcpa(x)
+//
+GLOBAL_IEEE754_END(sqrtf)
+libm_alias_float_other (__sqrt, sqrt)
+
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mii
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ mov GR_Parameter_TAG = 50
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfs [GR_Parameter_Y] = FR_Y,16 // Store Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfs [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfs [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfs f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region)
+
+
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/e_sqrtf128.c b/sysdeps/ia64/fpu/e_sqrtf128.c
new file mode 100644
index 0000000000..d5bcb79aaa
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrtf128.c
@@ -0,0 +1 @@
+#include
diff --git a/sysdeps/ia64/fpu/e_sqrtl.S b/sysdeps/ia64/fpu/e_sqrtl.S
new file mode 100644
index 0000000000..a037342768
--- /dev/null
+++ b/sysdeps/ia64/fpu/e_sqrtl.S
@@ -0,0 +1,275 @@
+.file "sqrtl.s"
+
+
+// Copyright (c) 2000 - 2003, Intel Corporation
+// All rights reserved.
+//
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// * The name of Intel Corporation may not be used to endorse or promote
+// products derived from this software without specific prior written
+// permission.
+
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Intel Corporation is the author of this code, and requests that all
+// problem reports or change requests be submitted to it directly at
+// http://www.intel.com/software/products/opensource/libraries/num.htm.
+//
+//********************************************************************
+//
+// History:
+// 02/02/00 (hand-optimized)
+// 04/04/00 Unwind support added
+// 08/15/00 Bundle added after call to __libm_error_support to properly
+// set [the previously overwritten] GR_Parameter_RESULT.
+// 05/20/02 Cleaned up namespace and sf0 syntax
+// 02/10/03 Reordered header: .section, .global, .proc, .align
+//
+//********************************************************************
+//
+// Function: Combined sqrtl(x), where
+// _
+// sqrtl(x) = |x, for double-extended precision x values
+//
+//********************************************************************
+//
+// Resources Used:
+//
+// Floating-Point Registers: f8 (Input and Return Value)
+// f7 -f14
+//
+// General Purpose Registers:
+// r32-r36 (Locals)
+// r37-r40 (Used to pass arguments to error handling routine)
+//
+// Predicate Registers: p6, p7, p8
+//
+//********************************************************************
+//
+// IEEE Special Conditions:
+//
+// All faults and exceptions should be raised correctly.
+// sqrtl(QNaN) = QNaN
+// sqrtl(SNaN) = QNaN
+// sqrtl(+/-0) = +/-0
+// sqrtl(negative) = QNaN and error handling is called
+//
+//********************************************************************
+//
+// Implementation:
+//
+// Modified Newton-Raphson Algorithm
+//
+//********************************************************************
+
+GR_SAVE_PFS = r33
+GR_SAVE_B0 = r34
+GR_SAVE_GP = r35
+GR_Parameter_X = r37
+GR_Parameter_Y = r38
+GR_Parameter_RESULT = r39
+GR_Parameter_TAG = r40
+
+FR_X = f15
+FR_Y = f0
+FR_RESULT = f8
+
+.section .text
+GLOBAL_IEEE754_ENTRY(sqrtl)
+{ .mlx
+alloc r32= ar.pfs,0,5,4,0
+ // exponent of +1/2 in r2
+ movl r2 = 0x0fffe;;
+} { .mfi
+ // +1/2 in f10
+ setf.exp f12 = r2
+ // Step (1)
+ // y0 = 1/sqrt(a) in f7
+ frsqrta.s0 f7,p6=f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (2)
+ // H0 = +1/2 * y0 in f9
+ (p6) fma.s1 f9=f12,f7,f0
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (3)
+ // S0 = a * y0 in f7
+ (p6) fma.s1 f7=f8,f7,f0
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Make copy input x
+ mov f13=f8
+ nop.i 0
+} { .mfi
+ nop.m 0
+ fclass.m.unc p7,p8 = f8,0x3A
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (4)
+ // d0 = 1/2 - S0 * H0 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0;;
+}
+{ .mfi
+ nop.m 0
+ mov f15=f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (5)
+ // H1 = H0 + d0 * H0 in f9
+ (p6) fma.s1 f9=f10,f9,f9
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (6)
+ // S1 = S0 + d0 * S0 in f7
+ (p6) fma.s1 f7=f10,f7,f7
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (7)
+ // d1 = 1/2 - S1 * H1 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (8)
+ // H2 = H1 + d1 * H1 in f9
+ (p6) fma.s1 f9=f10,f9,f9
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (9)
+ // S2 = S1 + d1 * S1 in f7
+ (p6) fma.s1 f7=f10,f7,f7
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (10)
+ // d2 = 1/2 - S2 * H2 in f10
+ (p6) fnma.s1 f10=f7,f9,f12
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (11)
+ // e2 = a - S2 * S2 in f12
+ (p6) fnma.s1 f12=f7,f7,f8
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (12)
+ // S3 = S2 + d2 * S2 in f7
+ (p6) fma.s1 f7=f12,f9,f7
+ nop.i 0
+} { .mfi
+ nop.m 0
+ // Step (13)
+ // H3 = H2 + d2 * H2 in f9
+ (p6) fma.s1 f9=f10,f9,f9
+ nop.i 0;;
+} { .mfi
+ nop.m 0
+ // Step (14)
+ // e3 = a - S3 * S3 in f12
+ (p6) fnma.s1 f12=f7,f7,f8
+ nop.i 0;;
+} { .mfb
+ nop.m 0
+ // Step (15)
+ // S = S3 + e3 * H3 in f7
+ (p6) fma.s0 f8=f12,f9,f7
+ (p6) br.ret.sptk b0 ;;
+}
+{ .mfb
+ mov GR_Parameter_TAG = 48
+ mov f8 = f7
+ (p8) br.ret.sptk b0 ;;
+}
+//
+// This branch includes all those special values that are not negative,
+// with the result equal to frcpa(x)
+//
+
+
+// END DOUBLE EXTENDED PRECISION MINIMUM LATENCY SQUARE ROOT ALGORITHM
+GLOBAL_IEEE754_END(sqrtl)
+libm_alias_ldouble_other (__sqrt, sqrt)
+
+LOCAL_LIBM_ENTRY(__libm_error_region)
+.prologue
+{ .mfi
+ add GR_Parameter_Y=-32,sp // Parameter 2 value
+ nop.f 0
+.save ar.pfs,GR_SAVE_PFS
+ mov GR_SAVE_PFS=ar.pfs // Save ar.pfs
+}
+{ .mfi
+.fframe 64
+ add sp=-64,sp // Create new stack
+ nop.f 0
+ mov GR_SAVE_GP=gp // Save gp
+};;
+{ .mmi
+ stfe [GR_Parameter_Y] = FR_Y,16 // Save Parameter 2 on stack
+ add GR_Parameter_X = 16,sp // Parameter 1 address
+.save b0, GR_SAVE_B0
+ mov GR_SAVE_B0=b0 // Save b0
+};;
+.body
+{ .mib
+ stfe [GR_Parameter_X] = FR_X // Store Parameter 1 on stack
+ add GR_Parameter_RESULT = 0,GR_Parameter_Y
+ nop.b 0 // Parameter 3 address
+}
+{ .mib
+ stfe [GR_Parameter_Y] = FR_RESULT // Store Parameter 3 on stack
+ add GR_Parameter_Y = -16,GR_Parameter_Y
+ br.call.sptk b0=__libm_error_support# // Call error handling function
+};;
+{ .mmi
+ nop.m 0
+ nop.m 0
+ add GR_Parameter_RESULT = 48,sp
+};;
+{ .mmi
+ ldfe f8 = [GR_Parameter_RESULT] // Get return result off stack
+.restore sp
+ add sp = 64,sp // Restore stack pointer
+ mov b0 = GR_SAVE_B0 // Restore return address
+};;
+{ .mib
+ mov gp = GR_SAVE_GP // Restore gp
+ mov ar.pfs = GR_SAVE_PFS // Restore ar.pfs
+ br.ret.sptk b0 // Return
+};;
+
+LOCAL_LIBM_END(__libm_error_region#)
+.type __libm_error_support#,@function
+.global __libm_error_support#
diff --git a/sysdeps/ia64/fpu/fclrexcpt.c b/sysdeps/ia64/fpu/fclrexcpt.c
new file mode 100644
index 0000000000..3d64dd4afb
--- /dev/null
+++ b/sysdeps/ia64/fpu/fclrexcpt.c
@@ -0,0 +1,37 @@
+/* Clear given exceptions in current floating-point environment.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+feclearexcept (int excepts)
+{
+ fenv_t fpsr;
+
+ /* Get the current state. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ /* Clear the relevant bits. */
+ fpsr &= ~(((fenv_t) ((excepts & FE_ALL_EXCEPT) << 13)));
+ /* Put the new state in effect. */
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory");
+
+ /* success */
+ return 0;
+}
+libm_hidden_def (feclearexcept)
diff --git a/sysdeps/ia64/fpu/fedisblxcpt.c b/sysdeps/ia64/fpu/fedisblxcpt.c
new file mode 100644
index 0000000000..8e39906d69
--- /dev/null
+++ b/sysdeps/ia64/fpu/fedisblxcpt.c
@@ -0,0 +1,35 @@
+/* Disable floating-point exceptions.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fedisableexcept (int excepts)
+{
+ fenv_t old_fpsr;
+ fenv_t new_fpsr;
+
+ /* Get the current fpsr. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (old_fpsr));
+
+ new_fpsr = old_fpsr | ((fenv_t) excepts & FE_ALL_EXCEPT);
+
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (new_fpsr) : "memory");
+
+ return (old_fpsr ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT;
+}
diff --git a/sysdeps/ia64/fpu/feenablxcpt.c b/sysdeps/ia64/fpu/feenablxcpt.c
new file mode 100644
index 0000000000..f25cc517f7
--- /dev/null
+++ b/sysdeps/ia64/fpu/feenablxcpt.c
@@ -0,0 +1,35 @@
+/* Enable floating-point exceptions.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+feenableexcept (int excepts)
+{
+ fenv_t old_fpsr;
+ fenv_t new_fpsr;
+
+ /* Get the current fpsr. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (old_fpsr));
+
+ new_fpsr = old_fpsr & ~((fenv_t) excepts & FE_ALL_EXCEPT);
+
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (new_fpsr) : "memory");
+
+ return (old_fpsr ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT;
+}
diff --git a/sysdeps/ia64/fpu/fegetenv.c b/sysdeps/ia64/fpu/fegetenv.c
new file mode 100644
index 0000000000..96a4b536a2
--- /dev/null
+++ b/sysdeps/ia64/fpu/fegetenv.c
@@ -0,0 +1,30 @@
+/* Store current floating-point environment.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+__fegetenv (fenv_t *envp)
+{
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (*envp));
+
+ return 0;
+}
+libm_hidden_def (__fegetenv)
+weak_alias (__fegetenv, fegetenv)
+libm_hidden_weak (fegetenv)
diff --git a/sysdeps/ia64/fpu/fegetexcept.c b/sysdeps/ia64/fpu/fegetexcept.c
new file mode 100644
index 0000000000..416dbfc2dc
--- /dev/null
+++ b/sysdeps/ia64/fpu/fegetexcept.c
@@ -0,0 +1,29 @@
+/* Get enabled floating-point exceptions.
+ Copyright (C) 2000-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fegetexcept (void)
+{
+ fenv_t fpsr;
+
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ return (fpsr ^ FE_ALL_EXCEPT) & FE_ALL_EXCEPT;
+}
diff --git a/sysdeps/ia64/fpu/fegetmode.c b/sysdeps/ia64/fpu/fegetmode.c
new file mode 100644
index 0000000000..6a9dac8b47
--- /dev/null
+++ b/sysdeps/ia64/fpu/fegetmode.c
@@ -0,0 +1,26 @@
+/* Store current floating-point control modes. IA64 version.
+ Copyright (C) 2016-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fegetmode (femode_t *modep)
+{
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (*modep));
+ return 0;
+}
diff --git a/sysdeps/ia64/fpu/fegetround.c b/sysdeps/ia64/fpu/fegetround.c
new file mode 100644
index 0000000000..5f0ee8db3e
--- /dev/null
+++ b/sysdeps/ia64/fpu/fegetround.c
@@ -0,0 +1,28 @@
+/* Return current rounding direction.
+ Copyright (C) 1999-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+__fegetround (void)
+{
+ return get_rounding_mode ();
+}
+libm_hidden_def (__fegetround)
+weak_alias (__fegetround, fegetround)
+libm_hidden_weak (fegetround)
diff --git a/sysdeps/ia64/fpu/feholdexcpt.c b/sysdeps/ia64/fpu/feholdexcpt.c
new file mode 100644
index 0000000000..72b8dbbe6f
--- /dev/null
+++ b/sysdeps/ia64/fpu/feholdexcpt.c
@@ -0,0 +1,42 @@
+/* Store current floating-point environment and clear exceptions.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+__feholdexcept (fenv_t *envp)
+{
+ fenv_t fpsr;
+ /* Save the current state. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+ *envp = fpsr;
+
+ /* Set the trap disable bits. */
+ fpsr |= FE_ALL_EXCEPT;
+
+ /* And clear the exception bits. */
+ fpsr &= ~(fenv_t) (FE_ALL_EXCEPT << 13);
+
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr));
+
+ /* Success. */
+ return 0;
+}
+libm_hidden_def (__feholdexcept)
+weak_alias (__feholdexcept, feholdexcept)
+libm_hidden_weak (feholdexcept)
diff --git a/sysdeps/ia64/fpu/fesetenv.c b/sysdeps/ia64/fpu/fesetenv.c
new file mode 100644
index 0000000000..8d024d2652
--- /dev/null
+++ b/sysdeps/ia64/fpu/fesetenv.c
@@ -0,0 +1,42 @@
+/* Install given floating-point environment.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+__fesetenv (const fenv_t *envp)
+{
+ fenv_t env;
+
+ /*
+ This stinks!
+ Magic encoding of default values: bit 62+63 set (which will never
+ happen for a user-space address) means it's not indirect.
+ */
+ if (((fenv_t) envp >> 62) == 0x03)
+ env = (fenv_t) envp & 0x3fffffffffffffff;
+ else
+ env = *envp;
+
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0;;" :: "r" (env));
+
+ return 0;
+}
+libm_hidden_def (__fesetenv)
+weak_alias (__fesetenv, fesetenv)
+libm_hidden_weak (fesetenv)
diff --git a/sysdeps/ia64/fpu/fesetexcept.c b/sysdeps/ia64/fpu/fesetexcept.c
new file mode 100644
index 0000000000..39ad7d3af4
--- /dev/null
+++ b/sysdeps/ia64/fpu/fesetexcept.c
@@ -0,0 +1,31 @@
+/* Set given exception flags. IA64 version.
+ Copyright (C) 2016-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fesetexcept (int excepts)
+{
+ fenv_t fpsr;
+
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+ fpsr |= ((excepts & FE_ALL_EXCEPT) << 13);
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory");
+
+ return 0;
+}
diff --git a/sysdeps/ia64/fpu/fesetmode.c b/sysdeps/ia64/fpu/fesetmode.c
new file mode 100644
index 0000000000..7078a03873
--- /dev/null
+++ b/sysdeps/ia64/fpu/fesetmode.c
@@ -0,0 +1,42 @@
+/* Install given floating-point control modes. IA64 version.
+ Copyright (C) 2016-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+#define FPSR_STATUS 0x1f80UL
+#define FPSR_STATUS_ALL ((FPSR_STATUS << 6) | (FPSR_STATUS << 19) \
+ | (FPSR_STATUS << 32) | (FPSR_STATUS << 45))
+
+int
+fesetmode (const femode_t *modep)
+{
+ femode_t mode;
+
+ /* As in fesetenv. */
+ if (((fenv_t) modep >> 62) == 0x03)
+ mode = (femode_t) modep & 0x3fffffffffffffffUL;
+ else
+ mode = *modep;
+
+ femode_t fpsr;
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+ fpsr = (fpsr & FPSR_STATUS_ALL) | (mode & ~FPSR_STATUS_ALL);
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0;;" :: "r" (fpsr));
+
+ return 0;
+}
diff --git a/sysdeps/ia64/fpu/fesetround.c b/sysdeps/ia64/fpu/fesetround.c
new file mode 100644
index 0000000000..8f900d5ef5
--- /dev/null
+++ b/sysdeps/ia64/fpu/fesetround.c
@@ -0,0 +1,42 @@
+/* Set current rounding direction.
+ Copyright (C) 1999-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+__fesetround (int round)
+{
+ fenv_t fpsr;
+
+ if (round & ~3)
+ return 1;
+
+ /* Get the current state. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ /* Set the relevant bits. */
+ fpsr = (fpsr & ~(3UL << 10)) | ((fenv_t) round << 10);
+
+ /* Put the new state in effect. */
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory");
+
+ return 0;
+}
+libm_hidden_def (__fesetround)
+weak_alias (__fesetround, fesetround)
+libm_hidden_weak (fesetround)
diff --git a/sysdeps/ia64/fpu/feupdateenv.c b/sysdeps/ia64/fpu/feupdateenv.c
new file mode 100644
index 0000000000..75d6ac2105
--- /dev/null
+++ b/sysdeps/ia64/fpu/feupdateenv.c
@@ -0,0 +1,41 @@
+/* Install given floating-point environment and raise exceptions.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+__feupdateenv (const fenv_t *envp)
+{
+ fenv_t fpsr;
+
+
+ /* Get the current exception state. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ /* Install new environment. */
+ __fesetenv (envp);
+
+ /* Raise the saved exceptions. */
+ __feraiseexcept ((int) (fpsr >> 13) & FE_ALL_EXCEPT);
+
+ /* Success. */
+ return 0;
+}
+libm_hidden_def (__feupdateenv)
+weak_alias (__feupdateenv, feupdateenv)
+libm_hidden_weak (feupdateenv)
diff --git a/sysdeps/ia64/fpu/fgetexcptflg.c b/sysdeps/ia64/fpu/fgetexcptflg.c
new file mode 100644
index 0000000000..38f1098dfd
--- /dev/null
+++ b/sysdeps/ia64/fpu/fgetexcptflg.c
@@ -0,0 +1,33 @@
+/* Store current representation for exceptions.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fegetexceptflag (fexcept_t *flagp, int excepts)
+{
+ fenv_t fpsr;
+
+ /* Get the current exceptions. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ *flagp = (fexcept_t) ((fpsr >> 13) & excepts & FE_ALL_EXCEPT);
+
+ /* Success. */
+ return 0;
+}
diff --git a/sysdeps/ia64/fpu/fraiseexcpt.c b/sysdeps/ia64/fpu/fraiseexcpt.c
new file mode 100644
index 0000000000..02b0c4ead1
--- /dev/null
+++ b/sysdeps/ia64/fpu/fraiseexcpt.c
@@ -0,0 +1,79 @@
+/* Raise given exceptions.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+#include
+#include
+#include
+#include
+
+int
+__feraiseexcept (int excepts)
+{
+ double tmp;
+ double dummy;
+
+ /* Raise exceptions represented by EXPECTS. But we must raise only
+ one signal at a time. It is important the if the overflow/underflow
+ exception and the inexact exception are given at the same time,
+ the overflow/underflow exception precedes the inexact exception. */
+
+ /* We do these bits in assembly to be certain GCC doesn't optimize
+ away something important. */
+
+ /* First: invalid exception. */
+ if (FE_INVALID & excepts)
+ {
+ /* One example of an invalid operation is 0 * Infinity. */
+ tmp = 0;
+ __asm__ __volatile__ ("frcpa.s0 %0,p1=f0,f0" : "=f" (tmp) : : "p1" );
+ }
+
+ /* Next: division by zero. */
+ if (FE_DIVBYZERO & excepts)
+ __asm__ __volatile__ ("frcpa.s0 %0,p1=f1,f0" : "=f" (tmp) : : "p1" );
+
+ /* Next: overflow. */
+ if (FE_OVERFLOW & excepts)
+ {
+ dummy = DBL_MAX;
+
+ __asm__ __volatile__ ("fadd.d.s0 %0=%1,%1" : "=f" (dummy) : "0" (dummy));
+ }
+
+ /* Next: underflow. */
+ if (FE_UNDERFLOW & excepts)
+ {
+ dummy = DBL_MIN;
+
+ __asm__ __volatile__ ("fnma.d.s0 %0=%1,%1,f0" : "=f" (tmp) : "f" (dummy));
+ }
+
+ /* Last: inexact. */
+ if (FE_INEXACT & excepts)
+ {
+ dummy = DBL_MAX;
+ __asm__ __volatile__ ("fsub.d.s0 %0=%1,f1" : "=f" (dummy) : "0" (dummy));
+ }
+
+ /* Success. */
+ return 0;
+}
+libm_hidden_def (__feraiseexcept)
+weak_alias (__feraiseexcept, feraiseexcept)
+libm_hidden_weak (feraiseexcept)
diff --git a/sysdeps/ia64/fpu/fsetexcptflg.c b/sysdeps/ia64/fpu/fsetexcptflg.c
new file mode 100644
index 0000000000..148f9fa64b
--- /dev/null
+++ b/sysdeps/ia64/fpu/fsetexcptflg.c
@@ -0,0 +1,39 @@
+/* Set floating-point environment exception handling.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fesetexceptflag (const fexcept_t *flagp, int excepts)
+{
+ fenv_t fpsr;
+
+ /* Get the current exception state. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ fpsr &= ~(((fenv_t) excepts & FE_ALL_EXCEPT) << 13);
+
+ /* Set all the bits that were called for. */
+ fpsr |= ((*flagp & excepts & FE_ALL_EXCEPT) << 13);
+
+ /* And store it back. */
+ __asm__ __volatile__ ("mov.m ar.fpsr=%0" :: "r" (fpsr) : "memory");
+
+ /* Success. */
+ return 0;
+}
diff --git a/sysdeps/ia64/fpu/ftestexcept.c b/sysdeps/ia64/fpu/ftestexcept.c
new file mode 100644
index 0000000000..ae542070c8
--- /dev/null
+++ b/sysdeps/ia64/fpu/ftestexcept.c
@@ -0,0 +1,31 @@
+/* Test exception in current environment.
+ Copyright (C) 1997-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#include
+
+int
+fetestexcept (int excepts)
+{
+ fenv_t fpsr;
+
+ /* Get current exceptions. */
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ return (fpsr >> 13) & excepts & FE_ALL_EXCEPT;
+}
+libm_hidden_def (fetestexcept)
diff --git a/sysdeps/ia64/fpu/gen_import_file_list b/sysdeps/ia64/fpu/gen_import_file_list
new file mode 100644
index 0000000000..b8bd6a54f3
--- /dev/null
+++ b/sysdeps/ia64/fpu/gen_import_file_list
@@ -0,0 +1,90 @@
+#!/bin/sh
+
+libm_dir=$1
+
+import() {
+ # $1 = name
+ # $2 = source file-name
+ # $3 = destination file-name
+ echo "$1 $libm_dir/$2 $3"
+}
+
+import_c() {
+ # $1 = name
+ # $2 = source file-name
+ # $3 = destination file-name
+ echo "$1 $libm_dir/$2 $3"
+}
+
+dummy_files="
+libm_cpu_defs.h
+libm_error_codes.h
+libm_support.h
+libm_error.c
+"
+
+for f in $dummy_files
+do
+ import_c DUMMY $f $f
+done
+
+import_c scalblnf scalblnf.c s_scalblnf.c
+
+for f in acos acosh asin atanh cosh exp2 exp10 fmod log2 pow remainder \
+ scalb sinh sqrt; do
+ for t in "" f l; do
+ import $f$t $f$t.s e_$f$t.S
+ done
+done
+
+for f in atan2 exp; do
+ for t in "" f; do
+ import $f$t $f$t.s e_$f$t.S
+ done
+done
+import "atan" atan.s s_atan.S
+import "atanf" atanf.s s_atanf.S
+import "atan(2)?l" atanl.s s_atanl.S
+import "exp(m1)?l" expl_m1.s s_expm1l.S
+
+for t in "" f l; do
+ import "log(10)?$t" log$t.s e_log$t.S
+ import tgamma$t tgamma$t.s w_tgamma$t.S
+ import "(hypot|cabs)$t" hypot$t.s e_hypot$t.S
+done
+
+for f in asinh cbrt ceil erf erfc fabs floor \
+ ilogb log1p logb modf nearbyint nextafter nexttoward \
+ rint round significand fdim fma fmax tanh trunc; do
+ for t in "" f l; do
+ import $f$t $f$t.s s_$f$t.S
+ done
+done
+
+for t in "" f l; do
+ import "(tan|cot)$t" tancot$t.s s_tan$t.S
+done
+
+for t in "" f l; do
+ import "(sin|cos)$t" sincos$t.s s_cos$t.S
+ import_c frexp$t frexp$t.c s_frexp$t.c
+ import_c ldexp$t ldexp$t.c s_ldexp$t.c
+ import_c scalbn$t scalbn$t.c s_scalbn$t.c
+done
+
+import expm1 exp_m1.s s_expm1.S
+import expm1f expf_m1.s s_expm1f.S
+
+for f in frexp frexpf frexpl reduce; do
+ import __libm_$f libm_$f.s libm_$f.S
+done
+
+for t in "" f l; do
+ import __libm_ldexp$t libm_ldexp$t.s s_libm_ldexp$t.S
+ import "(__libm_)?(sincos|cis)$t" libm_sincos$t.s libm_sincos$t.S
+ import __libm_lgamma$t libm_lgamma$t.s libm_lgamma$t.S
+ import __libm_scalbn$t libm_scalbn$t.s s_libm_scalbn$t.S
+done
+import __libm_scalblnf libm_scalblnf.s libm_scalblnf.S
+import "__libm_(sin|cos|sincos)_large" libm_sincos_large.s \
+ libm_sincos_large.S
diff --git a/sysdeps/ia64/fpu/get-rounding-mode.h b/sysdeps/ia64/fpu/get-rounding-mode.h
new file mode 100644
index 0000000000..454ef9a17b
--- /dev/null
+++ b/sysdeps/ia64/fpu/get-rounding-mode.h
@@ -0,0 +1,36 @@
+/* Return current rounding direction within libc. IA64 version.
+ Copyright (C) 1999-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef IA64_GET_ROUNDING_MODE_H
+#define IA64_GET_ROUNDING_MODE_H 1
+
+#include
+
+/* Return the floating-point rounding mode. */
+
+static inline int
+get_rounding_mode (void)
+{
+ fenv_t fpsr;
+
+ __asm__ __volatile__ ("mov.m %0=ar.fpsr" : "=r" (fpsr));
+
+ return (fpsr >> 10) & 3;
+}
+
+#endif /* get-rounding-mode.h */
diff --git a/sysdeps/ia64/fpu/import_check b/sysdeps/ia64/fpu/import_check
new file mode 100644
index 0000000000..4303d2ff37
--- /dev/null
+++ b/sysdeps/ia64/fpu/import_check
@@ -0,0 +1,81 @@
+#!/bin/sh
+
+objdir="$1"
+
+num_errors=0
+
+check_syms() {
+ global_count=0
+ entry_count=0
+ while read value type name; do
+ if [ $value = "U" ]; then
+ name=$type
+ # undefined symbols must start with double-underscore
+ if [ $(expr $name : '\(..\)') != "__" ]; then
+ echo -e "$(basename $file):\tError: undefined reference $name doesn't start with \"__\"."
+ num_errors=$(($num_errors + 1))
+ fi
+ continue
+ fi
+
+ case "$type" in
+ W)
+ entry_count=$(($entry_count + 1))
+ ;;
+ *)
+ entry_count=$(($entry_count + 1))
+ if [ "$(expr $name : '\(..\)')" != "__" ]; then
+ global_count=$(($global_count + 1))
+ fi
+ ;;
+ esac
+ done
+ if [ $entry_count -gt 1 -a $global_count -gt 0 ]; then
+ echo -e "$(basename $file):\tError: detected $global_count strong " \
+ "global and $entry_count entry-points."
+ num_errors=$(($num_errors + 1))
+ fi
+}
+
+check_file() {
+ file=$1
+ size=$(readelf -S $file | \
+ (sz=0; while read line; do
+ if echo $line | grep -F -q " .rodata"; then
+ read sz rest
+ break
+ fi
+ done;
+ printf "%d" 0x$sz))
+
+ summands=$(readelf -s $file | grep -F " OBJECT " | tr -s ' ' |
+ cut -f4 -d' ' | sed 's,$,+,')0
+ sum=$(($summands))
+ if [ $sum != $size ]; then
+ echo -e "$(basename $file):\tError: sum of objects=$sum bytes, .rodata size=$size bytes"
+ num_errors=$(($num_errors + 1))
+ fi
+
+ tmp=$(tempfile -p syms)
+ nm -g $file > $tmp
+ check_syms < $tmp
+}
+
+do_checks() {
+ echo "Note: 1 error expected in w_tgammal.o due to 64-byte alignment-padding."
+ while read func_pattern src_file dst_file; do
+ if [ "$(expr $dst_file : '.*\(S\)$')" = "S" ]; then
+ objfile=$(expr $dst_file : '\(.*\)[.]S$')
+ check_file $objdir/$objfile.o
+ fi
+ done
+}
+
+do_checks < import_file_list
+
+if [ $num_errors -gt 0 ]; then
+ echo "FAILURE: Detected $num_errors error(s)."
+ exit 1
+fi
+echo SUCCESS
+exit 0
diff --git a/sysdeps/ia64/fpu/import_diffs b/sysdeps/ia64/fpu/import_diffs
new file mode 100644
index 0000000000..147280d5fd
--- /dev/null
+++ b/sysdeps/ia64/fpu/import_diffs
@@ -0,0 +1,7 @@
+#!/bin/sh
+do_diffs() {
+ while read func_pattern src_file dst_file; do
+ diff -up $src_file $dst_file
+ done
+}
+do_diffs < import_file_list
diff --git a/sysdeps/ia64/fpu/import_file.awk b/sysdeps/ia64/fpu/import_file.awk
new file mode 100644
index 0000000000..9139faac12
--- /dev/null
+++ b/sysdeps/ia64/fpu/import_file.awk
@@ -0,0 +1,150 @@
+BEGIN {
+ getline;
+ while (!match($0, "^/[/*] static char cvs_id")) {
+ print;
+ getline;
+ }
+ getline;
+ while (!match($0, "^// WARRANTY DISCLAIMER")) {
+ print;
+ if (!getline) {
+ break;
+ }
+ }
+ if (getline)
+ {
+ printf \
+"// Redistribution and use in source and binary forms, with or without\n" \
+"// modification, are permitted provided that the following conditions are\n" \
+"// met:\n" \
+"//\n" \
+"// * Redistributions of source code must retain the above copyright\n" \
+"// notice, this list of conditions and the following disclaimer.\n" \
+"//\n" \
+"// * Redistributions in binary form must reproduce the above copyright\n" \
+"// notice, this list of conditions and the following disclaimer in the\n" \
+"// documentation and/or other materials provided with the distribution.\n" \
+"//\n" \
+"// * The name of Intel Corporation may not be used to endorse or promote\n" \
+"// products derived from this software without specific prior written\n" \
+"// permission.\n\n";
+ if (LICENSE_ONLY == "y") {
+ do {
+ print;
+ } while (getline);
+ }
+ }
+}
+
+/^[.]data/ {
+ print "RODATA";
+ next;
+}
+/^([a-zA-Z_0-9]*_(tb[l0-9]|Tt|[tT]able|data|low|coeffs|constants|CONSTANTS|reduction|Stirling)(_?([1-9cdimpqstPQT]+|tail))?|(Constants|Poly|coeff)_.+|(double_sin_?cos|double_cis)[fl]?_.+):/ {
+ table_name=substr($1,1,length($1)-1);
+ printf "LOCAL_OBJECT_START(%s)\n", table_name;
+ getline;
+ while (!match($0, "^[ \t]*data")) {
+ print;
+ getline;
+ }
+ while (match($0, "(//|^[ \t]*data)")) {
+ print;
+ getline;
+ }
+ printf "LOCAL_OBJECT_END(%s)\n\n", table_name;
+ next;
+}
+/^[.]proc[ \t]+__libm_(error_region|callout)/ {
+ printf "LOCAL_LIBM_ENTRY(%s)\n", $2;
+ getline;
+ next;
+}
+/^[.]endp[ \t]+__libm_(error_region|callout)/ {
+ printf "LOCAL_LIBM_END(%s)\n", $2;
+ next;
+}
+/^[.]global/ {
+ split($2, part, "#");
+ name=part[1];
+ if (match(name, "^"FUNC"$")) {
+ next;
+ }
+}
+/^[.]proc/ {
+ split($2, part, "#");
+ name=part[1];
+ if (match(name, "^"FUNC"$")) {
+ local_funcs=("^(" \
+ "cis|cisf|cisl" \
+ "|cabs|cabsf|cabsl" \
+ "|cot|cotf|cotl" \
+ ")$");
+ ieee754_funcs=("^(" \
+ "atan2|atan2f|atan2l|atanl" \
+ "|cos|cosf|cosl" \
+ "|cosh|coshf|coshl" \
+ "|exp|expf|expl" \
+ "|exp10|exp10f|exp10l" \
+ "|expm1|expm1f|expm1l" \
+ "|fmod|fmodf|fmodl" \
+ "|hypot|hypotf|hypotl" \
+ "|fabs|fabsf|fabsl" \
+ "|floor|floorf|floorl" \
+ "|log1p|log1pf|log1pl" \
+ "|log|log10|log10f|log10l|log2l|logf|logl" \
+ "|remainder|remainderf|remainderl|" \
+ "|rint|rintf|rintl|" \
+ "|scalb|scalbf|scalbl" \
+ "|sin|sinf|sinl" \
+ "|sincos|sincosf|sincosl" \
+ "|sinh|sinhf|sinhl" \
+ "|sqrt|sqrtf|sqrtl" \
+ "|tan|tanf|tanl" \
+ ")$");
+ if (match(name, ieee754_funcs)) {
+ type="GLOBAL_IEEE754";
+ } else if (match (name, local_funcs)) {
+ type="LOCAL_LIBM";
+ } else {
+ type="GLOBAL_LIBM";
+ }
+ printf "%s_ENTRY(%s)\n", type, name;
+ getline;
+ while (!match($0, "^"name"#?:")) {
+ getline;
+ }
+ getline;
+ while (!match($0, "^.endp")) {
+ print
+ getline;
+ }
+ printf "%s_END(%s)\n", type, name;
+ if (match(name, "^exp10[fl]?$")) {
+ t=substr(name,6)
+ printf "weak_alias (exp10%s, pow10%s)\n", t, t
+ }
+ next;
+ }
+}
+/^[a-zA-Z_]+:/ {
+ split($1, part, ":");
+ name=part[1];
+ if (match(name, "^"FUNC"$")) {
+ printf "GLOBAL_LIBM_ENTRY(%s)\n", name;
+ getline;
+ while (!match($0, "^"name"#?:")) {
+ getline;
+ }
+ getline;
+ while (!match($0, "^.endp")) {
+ print
+ getline;
+ }
+ getline;
+ printf "GLOBAL_LIBM_END(%s)\n", name;
+ next;
+ }
+}
+
+{ print }
diff --git a/sysdeps/ia64/fpu/import_intel_libm b/sysdeps/ia64/fpu/import_intel_libm
new file mode 100644
index 0000000000..1aaa646a93
--- /dev/null
+++ b/sysdeps/ia64/fpu/import_intel_libm
@@ -0,0 +1,106 @@
+#!/bin/sh
+
+# Notes:
+
+# We don't import copysign finite, fpclassify, isinf, isnan, and signbit
+# since our own versions are nicer and just as correct and fast (except
+# perhaps that they don't handle non-finite arguments well?).
+#
+# Also, leave out cabs for now since it doesn't seem overridable in
+# glibc.
+
+libm_dir=$1
+
+import_s() {
+ # $1 = name
+ # $2 = source file-name
+ # $3 = destination file-name
+ echo "Importing $1 from $2 -> $3"
+ rm -f $3
+ awk -f import_file.awk FUNC=$1 $2 > $3
+}
+
+import_c() {
+ # $1 = name
+ # $2 = source file-name
+ # $3 = destination file-name
+ echo "Importing $1 from $2 -> $3"
+ rm -f $3
+ awk -f import_file.awk LICENSE_ONLY=y $2 > $3
+}
+
+do_imports() {
+ while read func_pattern src_file dst_file; do
+ case $src_file in
+ *.[ch])
+ import_c "$func_pattern" "$src_file" "$dst_file"
+ ;;
+ *)
+ import_s "$func_pattern" "$src_file" "$dst_file"
+ ;;
+ esac
+ done
+}
+
+./gen_import_file_list $libm_dir > import_file_list
+
+do_imports < import_file_list
+
+emptyfiles="
+e_gamma_r.c
+e_gammaf_r.c
+e_gammal_r.c
+s_sincos.c
+s_sincosf.c
+s_sincosl.c
+t_exp.c
+w_acosh.c
+w_acoshf.c
+w_acoshl.c
+w_atanh.c
+w_atanhf.c
+w_atanhl.c
+w_exp10.c
+w_exp10f.c
+w_exp10l.c
+w_exp2.c
+w_exp2f.c
+w_exp2l.c
+w_expl.c
+w_lgamma_r.c
+w_lgammaf_r.c
+w_lgammal_r.c
+w_log2.c
+w_log2f.c
+w_log2l.c
+w_sinh.c
+w_sinhf.c
+w_sinhl.c
+"
+for f in $emptyfiles
+do
+ rm -f $f
+ echo "/* Not needed. */" > $f
+done
+
+removedfiles="
+libm_atan2_reg.S
+s_ldexp.S
+s_ldexpf.S
+s_ldexpl.S
+s_scalbn.S
+s_scalbnf.S
+s_scalbnl.S
+"
+
+rm -f $removedfiles
+
+for f in lgammaf_r.c lgammal_r.c lgamma_r.c
+do
+ import_c $f $libm_dir/$f e_$f
+done
+
+for f in lgamma.c lgammaf.c lgammal.c
+do
+ import_c $f $libm_dir/$f w_$f
+done
diff --git a/sysdeps/ia64/fpu/lgamma-compat.h b/sysdeps/ia64/fpu/lgamma-compat.h
new file mode 100644
index 0000000000..cac3a485c0
--- /dev/null
+++ b/sysdeps/ia64/fpu/lgamma-compat.h
@@ -0,0 +1,42 @@
+/* ABI compatibility for lgamma functions. ia64 version.
+ Copyright (C) 2015-2024 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ . */
+
+#ifndef IA64_LGAMMA_COMPAT_H
+#define IA64_LGAMMA_COMPAT_H 1
+
+#include