/**
 * @file sphere_part.c
 *
 * @copyright Copyright  (C)  2014 Moritz Hanke <hanke@dkrz.de>
 *                                 Thomas Jahns <jahns@dkrz.de>
 *
 * @version 1.0
 * @author Moritz Hanke <hanke@dkrz.de>
 *         Thomas Jahns <jahns@dkrz.de>
 */
/*
 * Keywords:
 * Maintainer: Moritz Hanke <hanke@dkrz.de>
 *             Rene Redler <rene.redler@mpimet.mpg.de>
 * URL: https://doc.redmine.dkrz.de/YAC/html/index.html
 *
 * This file is part of YAC.
 *
 * YAC is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * YAC is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with YAC.  If not, see <http://www.gnu.org/licenses/gpl.txt>.
 */

#include <assert.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>

#include "sphere_part.h"
#include "geometry.h"
#include "grid.h"
#include "interval_tree.h"
#include "utils.h"
#include "ensure_array_size.h"
#include "grid_search.h"
#include "grid_search_utils.h"

union I_list {
  struct {
    struct interval_node * head_node;
    size_t num_nodes;
  } ivt;
  size_t *list;
};

enum {
   I_list_tree_min_size = 2, //!< make I list into tree when list is
                             //!<  larger than this
};
enum {
  U_FLAG = 1,
  T_FLAG = 2,
};

enum yac_node_flags {
   U_IS_LEAF = 1,
   T_IS_LEAF = 2,
   I_IS_INTERVAL_TREE = 4,
};

struct sphere_part_node {

   int flags;

   union I_list I;
   void * U, * T;

   size_t I_size, U_size, T_size;

   struct sin_cos_angle I_angle;

   double gc_norm_vector[3];
};

struct sphere_part_search {

   struct grid_search_vtable * vtable;
   struct sphere_part_node base_node;
   size_t * local_cell_ids;
   struct grid * grid_data;
};

enum node_type {
  I_NODE = 0,
  U_NODE = 1,
  T_NODE = 2,
};

struct temp_partition_data {
  size_t local_id;
  struct bounding_circle bnd_circle;
  int node_type;
};

static void sphere_part_do_cell_search(struct grid_search * search,
                                       struct grid * grid_data,
                                       struct dep_list * tgt_to_src_cells);
static void sphere_part_do_cell_search_single(struct grid_search * search,
                                              struct grid_cell cell,
                                              size_t * n_cells,
                                              size_t * cells_size,
                                              size_t ** cells);
static void sphere_part_do_point_search_c(struct grid_search * search,
                                          struct grid * grid_data,
                                          struct dep_list * tgt_to_src_cells);
static void sphere_part_do_point_search_c2(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * tgt_to_src_cells);
static void sphere_part_do_point_search_c3(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * tgt_to_src_cells,
                                           struct points * points);
static void sphere_part_do_point_search_p(struct grid_search * search,
                                          struct grid * grid_data,
                                          struct dep_list * target_to_src_points);
static void sphere_part_do_point_search_p2(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * target_to_src_points);
static void sphere_part_do_point_search_p3(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * target_to_src_points,
                                           struct points * points);
static void sphere_part_do_point_search_p4 (struct grid_search * search,
                                            double coordinate_xyz[3],
                                            size_t * n_points,
                                            size_t * points_size,
                                            size_t ** points);
static void sphere_part_do_bnd_circle_search (struct grid_search * search,
                                              struct bounding_circle * bnd_circles,
                                              size_t n,
                                              struct dep_list * bnd_to_cells);
static void delete_sphere_part_search(struct grid_search * search);

static struct grid_search_vtable sphere_part_search_vtable =
{
   .do_cell_search        = sphere_part_do_cell_search,
   .do_cell_search_single = sphere_part_do_cell_search_single,
   .do_point_search_c     = sphere_part_do_point_search_c,
   .do_point_search_c2    = sphere_part_do_point_search_c2,
   .do_point_search_c3    = sphere_part_do_point_search_c3,
   .do_point_search_p     = sphere_part_do_point_search_p,
   .do_point_search_p2    = sphere_part_do_point_search_p2,
   .do_point_search_p3    = sphere_part_do_point_search_p3,
   .do_point_search_p4    = sphere_part_do_point_search_p4,
   .do_bnd_circle_search  = sphere_part_do_bnd_circle_search,
   .delete_grid_search    = delete_sphere_part_search
};

struct point_id_xyz {
  size_t idx; // index of the point in the coordinates array passed to
              // the constructor
  double coordinates_xyz[3];
};

struct point_id_xyz_angle {
  size_t idx; // index of the point in the coordinates array passed to
              // the constructor
  double coordinates_xyz[3];
  double cos_angle;
};

struct point_sphere_part_node {

   int flags;

   void * U, * T;

   size_t U_size, T_size;

   double gc_norm_vector[3];
};

struct point_sphere_part_search {

   struct point_sphere_part_node base_node;
   struct point_id_xyz * points;
   size_t max_tree_depth;
};

static void init_sphere_part_node(struct sphere_part_node * node) {

   node->flags = 0;
   node->I_size = 0;
   node->U_size = 0;
   node->T_size = 0;
   node->gc_norm_vector[0] = 0;
   node->gc_norm_vector[1] = 0;
   node->gc_norm_vector[2] = 1;
}

static struct sphere_part_node * get_sphere_part_node() {

   struct sphere_part_node * node = malloc(1 * sizeof(*node));

   init_sphere_part_node(node);

   return node;
}

static int compare_temp_partition_data(const void * a, const void * b) {

  return ((const struct temp_partition_data *)a)->node_type -
         ((const struct temp_partition_data *)b)->node_type;
}

static void partition_data (
   struct grid * grid, size_t * local_cell_ids,
   struct temp_partition_data * part_data, size_t num_cell_ids,
   size_t threshold, struct sphere_part_node * parent_node,
   double prev_gc_norm_vector[]) {

   double balance_point[3] = {0.0,0.0,0.0};

   // compute balance point
   for (size_t i = 0; i < num_cell_ids; ++i) {

     balance_point[0] += part_data[i].bnd_circle.base_vector[0];
     balance_point[1] += part_data[i].bnd_circle.base_vector[1];
     balance_point[2] += part_data[i].bnd_circle.base_vector[2];
   }

   normalise_vector(balance_point);

   // compute the great circle that partitions the data in half (more or less)

   crossproduct_ld(
      balance_point, prev_gc_norm_vector, parent_node->gc_norm_vector);
   normalise_vector(parent_node->gc_norm_vector);

   // partition data into cells that overlap with the great circle and cells
   // that are on side of the circle

   size_t I_size = 0;
   size_t U_size = 0;
   size_t T_size = 0;

   struct sin_cos_angle max_inc_angle = SIN_COS_ZERO;

   for (size_t i = 0; i < num_cell_ids; ++i) {

      struct bounding_circle curr_bnd_circle = part_data[i].bnd_circle;

      // get angle between the norm vector of the great circle and the base
      // point of the bounding circle
      struct sin_cos_angle angle =
        get_vector_angle_2(
          curr_bnd_circle.base_vector, parent_node->gc_norm_vector);

      // get the angle between between the plane of the great circle and base
      // point of the bounding circle
      struct sin_cos_angle diff_angle_gc =
        sin_cos_angle_new(fabs(angle.cos), angle.sin);

      // if the point intersects with the great circle
      if (compare_angles(diff_angle_gc, curr_bnd_circle.inc_angle) <= 0) {

         // set node type for current cell
         part_data[i].node_type = I_NODE;
         I_size++;

         struct sin_cos_angle inc_angle =
            sum_angles_no_check(diff_angle_gc, curr_bnd_circle.inc_angle);

         if (compare_angles(max_inc_angle, inc_angle) < 0)
            max_inc_angle = inc_angle;

      // angle > M_PI_2
      } else if (angle.cos < 0.0) {

         // set node type for current cell
         part_data[i].node_type = U_NODE;
         U_size++;

      } else {

         // set node type for current cell
         part_data[i].node_type = T_NODE;
         T_size++;
      }
   }

   qsort(
      part_data, num_cell_ids, sizeof(*part_data), compare_temp_partition_data);

   parent_node->I_size = I_size;
   parent_node->U_size = U_size;
   parent_node->T_size = T_size;

   // if max_inc_angle > PI/2
   if (compare_angles(max_inc_angle, SIN_COS_M_PI_2) >= 0) {
      parent_node->I_angle = SIN_COS_M_PI_2;
   } else {
      parent_node->I_angle = max_inc_angle;
   }

   if (I_size > 0) {

      if (I_size > I_list_tree_min_size) {

         assert(sizeof(struct interval_node) > sizeof(size_t));
         struct interval_node * head_node = malloc(I_size * sizeof(*head_node));
         parent_node->I.ivt.head_node = head_node;
         parent_node->I.ivt.num_nodes = I_size;

         for (size_t i = 0; i < I_size; ++i) {

            struct sin_cos_angle base_angle, corrected_inc_angle;
            int big_sum, neg;
            double GCp[3], bVp[3];
            struct bounding_circle curr_bnd_circle = part_data[i].bnd_circle;

            crossproduct_ld(parent_node->gc_norm_vector,
                            curr_bnd_circle.base_vector, GCp);
            crossproduct_ld(GCp, parent_node->gc_norm_vector, bVp);
            normalise_vector(bVp);
            base_angle = get_vector_angle_2(bVp, prev_gc_norm_vector);
            big_sum =
              sum_angles(curr_bnd_circle.inc_angle,
                         get_vector_angle_2(bVp, curr_bnd_circle.base_vector),
                         &corrected_inc_angle);

            // if the angle is bigger then PI
            if ((big_sum) || (corrected_inc_angle.sin < 0.0))
              corrected_inc_angle = SIN_COS_M_PI;

            struct sin_cos_angle left, right;
            // base_angle - corrected_inc_angle
            neg = sub_angles(base_angle, corrected_inc_angle, &left);
            // base_angle + corrected_inc_angle
            big_sum = sum_angles(base_angle, corrected_inc_angle, &right);

            head_node[i].range.left = compute_angle(left) - (neg?2.0*M_PI:0.0);
            head_node[i].range.right = compute_angle(right) +
                                       (big_sum?2.0*M_PI:0.0);
            head_node[i].value = part_data[i].local_id;
         }

         yac_generate_interval_tree(head_node, I_size);
         parent_node->flags |= I_IS_INTERVAL_TREE;
      } else {
         for (size_t i = 0; i < I_size; ++i)
            local_cell_ids[i] = part_data[i].local_id;
         parent_node->I.list = (void*)local_cell_ids;
      }
   } else
      parent_node->I.list = NULL;

   part_data += I_size;
   local_cell_ids += I_size;

   // check whether the lists are small enough (if not -> partition again)
   if (U_size <= threshold) {

      for (size_t i = 0; i < U_size; ++i)
         local_cell_ids[i] = part_data[i].local_id;
      parent_node->U = (void*)local_cell_ids;
      parent_node->flags |= U_IS_LEAF;

   } else {

      parent_node->U = get_sphere_part_node();
      partition_data(grid, local_cell_ids, part_data, U_size, threshold,
                     parent_node->U, parent_node->gc_norm_vector);
   }
   local_cell_ids += U_size;
   part_data += U_size;

   if (T_size <= threshold) {

      for (size_t i = 0; i < T_size; ++i)
         local_cell_ids[i] = part_data[i].local_id;
      parent_node->T = (void*)local_cell_ids;
      parent_node->flags |= T_IS_LEAF;
      local_cell_ids += T_size;

   } else {

      parent_node->T = get_sphere_part_node();
      partition_data(grid, local_cell_ids, part_data, T_size, threshold,
                     parent_node->T, parent_node->gc_norm_vector);
   }
}

static int compare_point_idx_xyz(void const * a, void const * b) {
  return (((struct point_id_xyz *)a)->idx > ((struct point_id_xyz *)b)->idx) -
         (((struct point_id_xyz *)a)->idx < ((struct point_id_xyz *)b)->idx);
}

static struct point_sphere_part_node * partition_point_data (
  struct point_id_xyz * points, size_t num_points, size_t threshold,
  double prev_gc_norm_vector[], size_t curr_tree_depth,
  size_t * max_tree_depth) {

  if (curr_tree_depth > *max_tree_depth) *max_tree_depth = curr_tree_depth;

  struct point_sphere_part_node * node = malloc(1 * sizeof(*node));

  double balance_point[3] = {0.0,0.0,0.0};

  // compute balance point

  for (size_t i = 0; i < num_points; ++i) {

    double * curr_coordinates_xyz = &(points[i].coordinates_xyz[0]);
    balance_point[0] += curr_coordinates_xyz[0];
    balance_point[1] += curr_coordinates_xyz[1];
    balance_point[2] += curr_coordinates_xyz[2];
  }

  normalise_vector(balance_point);

  // compute the great circle that partitions the data in half (more or less)

  double * gc_norm_vector = &(node->gc_norm_vector[0]);
  crossproduct_ld(balance_point, prev_gc_norm_vector, gc_norm_vector);
  normalise_vector(gc_norm_vector);

  // angle between a point and the great circle plane
  // acos(dot(gc_norm_vector, point_xyz)) = angle(gc_norm_vector, point_xyz)
  // acos(dot(gc_norm_vector, point_xyz)) - PI/2 = angle(gc_plane, point_xyz)
  // dot <= 0.0    -> U list
  // dot >  0.0    -> T list

  struct point_id_xyz * left = points, * right = points + num_points - 1;

  // sort such that all points for the U list come first, followed by the
  // elements of the T list
  while (1) {
    // find element that does not belong into U-list
    while (left <= right) {
      double * curr_coordinates_xyz = &(left->coordinates_xyz[0]);
      double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
                   curr_coordinates_xyz[1] * gc_norm_vector[1] +
                   curr_coordinates_xyz[2] * gc_norm_vector[2];

      // if (angle < M_PI_2)
      if (dot > 0.0) break;
      ++left;
    };

    // find element that does not belong into T-list
    while (left < right) {
      double * curr_coordinates_xyz = &(right->coordinates_xyz[0]);
      double dot = curr_coordinates_xyz[0] * gc_norm_vector[0] +
                   curr_coordinates_xyz[1] * gc_norm_vector[1] +
                   curr_coordinates_xyz[2] * gc_norm_vector[2];

      // if (angle >= M_PI_2)
      if (dot <= 0.0) break;
      --right;
    }

    if (left < right) {
      struct point_id_xyz tmp_point = *left;
      *left = *right;
      *right = tmp_point;
      ++left;
      --right;
    } else {
      break;
    }
  }

  size_t U_size = left - points;
  size_t T_size = num_points - U_size;

  node->U_size = U_size;
  node->T_size = T_size;
  node->flags = 0;

  // check whether the lists are small enough (if not -> partition again)
  if (U_size <= threshold) {

    node->U = points;
    node->flags |= U_IS_LEAF;
    qsort(points, U_size, sizeof(*points), compare_point_idx_xyz);

  } else {

    node->U = partition_point_data(points, U_size, threshold, gc_norm_vector,
                                   curr_tree_depth + 1, max_tree_depth);
  }

  if (T_size <= threshold) {

    node->T = points + U_size;
    node->flags |= T_IS_LEAF;
    qsort(points + U_size, T_size, sizeof(*points), compare_point_idx_xyz);

  } else {

    node->T =
      partition_point_data(points + U_size, T_size, threshold, gc_norm_vector,
                           curr_tree_depth + 1, max_tree_depth);
  }

  return node;
}

struct grid_search * yac_sphere_part_search_new (struct grid * grid) {

   struct sphere_part_search * search = malloc(1 * sizeof(*search));

   search->vtable = &sphere_part_search_vtable;
   search->grid_data = grid;

   double gc_norm_vector[3] = {0.0,0.0,1.0};

   init_sphere_part_node(&(search->base_node));

   size_t num_grid_cells = yac_get_num_grid_cells(grid);
   size_t * local_cell_ids = malloc(num_grid_cells * sizeof(*local_cell_ids));
   search->local_cell_ids = local_cell_ids;

   struct grid_cell dummy_cell;
   struct temp_partition_data * part_data =
      malloc(num_grid_cells * sizeof(*part_data));
   yac_init_grid_cell(&dummy_cell);
   for (size_t i = 0; i < num_grid_cells; ++i) {
      yac_get_grid_cell2(grid, i, &dummy_cell, &(part_data[i].bnd_circle));
      part_data[i].local_id = i;
   }
   yac_free_grid_cell(&dummy_cell);

   partition_data(grid, local_cell_ids, part_data, num_grid_cells,
                  I_list_tree_min_size, &(search->base_node), gc_norm_vector);

   free(part_data);

   return (struct grid_search *)search;
}

struct point_sphere_part_search * yac_point_sphere_part_search_new (
  size_t num_points, double (*coordinates_xyz)[3]) {

  if (num_points == 0) return NULL;

  struct point_sphere_part_search * search = malloc(1 * sizeof(*search));
  struct point_id_xyz * points = malloc(num_points * sizeof(*points));
  search->points = points;

  for (size_t i = 0; i < num_points; ++i) {
    points[i].idx = i;
    points[i].coordinates_xyz[0] = coordinates_xyz[i][0];
    points[i].coordinates_xyz[1] = coordinates_xyz[i][1];
    points[i].coordinates_xyz[2] = coordinates_xyz[i][2];
  }

  size_t max_tree_depth = 0;

  // emperical measurements have given a threshold for the leaf size of 2
  struct point_sphere_part_node * tmp_node =
    partition_point_data(
      points, num_points, I_list_tree_min_size, (double[3]){0.0,0.0,1.0},
      1, &max_tree_depth);

  search->base_node = *tmp_node;
  search->max_tree_depth = max_tree_depth;
  free(tmp_node);

  return search;
}

static void search_bnd_circle_I_node(
  struct sphere_part_node * node, struct bounding_circle bnd_circle,
  size_t ** restrict overlap_cells, size_t * overlap_cells_array_size,
  size_t * restrict num_overlap_cells,
  struct overlaps * search_interval_tree_buffer, double prev_gc_norm_vector[]) {

  if (node->flags & I_IS_INTERVAL_TREE) {

     struct sin_cos_angle base_angle, corrected_inc_angle;
     int big_sum, neg;
     double GCp[3], bVp[3];
     crossproduct_ld(node->gc_norm_vector,
                     bnd_circle.base_vector, GCp);
     crossproduct_ld(GCp, node->gc_norm_vector, bVp);
     normalise_vector(bVp);
     base_angle = get_vector_angle_2(bVp, prev_gc_norm_vector);
     big_sum =
        sum_angles(
          bnd_circle.inc_angle, get_vector_angle_2(bVp, bnd_circle.base_vector),
          &corrected_inc_angle);

     // if the angle is bigger then PI
     if ((big_sum) || (corrected_inc_angle.sin < 0.0))
       corrected_inc_angle = SIN_COS_M_PI;

     struct sin_cos_angle left, right;
     // base_angle - corrected_inc_angle
     neg = sub_angles(base_angle, corrected_inc_angle, &left);
     // base_angle + corrected_inc_angle
     big_sum = sum_angles(base_angle, corrected_inc_angle, &right);

     search_interval_tree_buffer->num_overlaps = 0;

     yac_search_interval_tree(
        node->I.ivt.head_node, node->I.ivt.num_nodes,
        (struct interval){
           .left = compute_angle(left) - (neg?2.0*M_PI:0.0),
           .right = compute_angle(right) + (big_sum?2.0*M_PI:0.0)},
        search_interval_tree_buffer);

     ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                       *num_overlap_cells +
                       search_interval_tree_buffer->num_overlaps);

     for (size_t i = 0; i < search_interval_tree_buffer->num_overlaps;
          ++i) {
        (*overlap_cells)[(*num_overlap_cells)+i] =
           node->I.ivt.head_node[
              search_interval_tree_buffer->overlap_iv[i]].value;
     }

     *num_overlap_cells += search_interval_tree_buffer->num_overlaps;

  } else {

     ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                       *num_overlap_cells + node->I_size);
     memcpy(*overlap_cells + *num_overlap_cells, node->I.list,
            node->I_size * sizeof(**overlap_cells));
     *num_overlap_cells += node->I_size;
  }
}

//! TODO change to iterative implementation and allocate overlap_cells first
static void search_big_bnd_circle(
  struct sphere_part_node * node, struct bounding_circle bnd_circle,
  size_t ** restrict overlap_cells, size_t * overlap_cells_array_size,
  size_t * restrict num_overlap_cells,
  struct overlaps * search_interval_tree_buffer, double prev_gc_norm_vector[]) {

  if (node->flags & T_IS_LEAF) {

    ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                      *num_overlap_cells + node->T_size);
    memcpy(*overlap_cells + *num_overlap_cells, node->T,
           node->T_size * sizeof(**overlap_cells));
    *num_overlap_cells += node->T_size;

  } else {
    search_big_bnd_circle(
       node->T, bnd_circle, overlap_cells, overlap_cells_array_size,
       num_overlap_cells, search_interval_tree_buffer, node->gc_norm_vector);
  }

  if (node->flags & U_IS_LEAF) {

    ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                      *num_overlap_cells + node->U_size);
    memcpy(*overlap_cells + *num_overlap_cells, node->U,
           node->U_size * sizeof(**overlap_cells));
    *num_overlap_cells += node->U_size;

  } else {
    search_big_bnd_circle(
       node->U, bnd_circle, overlap_cells, overlap_cells_array_size,
       num_overlap_cells, search_interval_tree_buffer, node->gc_norm_vector);
  }

  search_bnd_circle_I_node(
     node, bnd_circle, overlap_cells, overlap_cells_array_size,
     num_overlap_cells, search_interval_tree_buffer, prev_gc_norm_vector);
}

static void search_small_bnd_circle(
  struct sphere_part_node * node, struct bounding_circle bnd_circle,
  size_t ** restrict overlap_cells, size_t * overlap_cells_array_size,
  size_t * restrict num_overlap_cells,
  struct overlaps * search_interval_tree_buffer, double prev_gc_norm_vector[]) {

   double dot = bnd_circle.base_vector[0] * node->gc_norm_vector[0] +
                bnd_circle.base_vector[1] * node->gc_norm_vector[1] +
                bnd_circle.base_vector[2] * node->gc_norm_vector[2];

   // angle < M_PI_2 + bnd_circle.inc_angle
   if (dot > - bnd_circle.inc_angle.sin) {

      if (node->flags & T_IS_LEAF) {

         ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                           *num_overlap_cells + node->T_size);
         memcpy(*overlap_cells + *num_overlap_cells, node->T,
                node->T_size * sizeof(**overlap_cells));
         *num_overlap_cells += node->T_size;

      } else {
         search_small_bnd_circle(
            node->T, bnd_circle, overlap_cells, overlap_cells_array_size,
            num_overlap_cells, search_interval_tree_buffer,
            node->gc_norm_vector);
      }
   }

   // angle > M_PI_2 - bnd_circle.inc_angle
   if (dot < bnd_circle.inc_angle.sin) {

      if (node->flags & U_IS_LEAF) {

         ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                           *num_overlap_cells + node->U_size);
         memcpy(*overlap_cells + *num_overlap_cells, node->U,
                node->U_size * sizeof(**overlap_cells));
         *num_overlap_cells += node->U_size;

      } else {
         search_small_bnd_circle(
            node->U, bnd_circle, overlap_cells, overlap_cells_array_size,
            num_overlap_cells, search_interval_tree_buffer,
            node->gc_norm_vector);
      }
   }

   struct sin_cos_angle angle_sum =
      sum_angles_no_check(node->I_angle, bnd_circle.inc_angle);

   // if (I_angle + inc_angle > PI/2) ||
   //    (fabs(angle - M_PI_2) <= (I_angle + inc_angle))
   //
   // assumtion:
   //   I_angle >= 0 && I_angle <= PI/2
   //   inc_angle >= 0 && inc_angle <= PI/2
   //   angle >= 0 && angle <= PI
   //
   //   => I_angle + inc_angle >= 0 && I_angle + inc_angle <= PI
   //
   // I_angle + inc_angle >= PI/2
   //
   // fabs(angle - M_PI_2) <= (I_angle + inc_angle)
   // => sin(fabs(angle - M_PI_2)) <= sin(I_angle + inc_angle)
   //    this is wrong for (I_angle + inc_angle) > PI/2, however that case is
   //    already covered by the first condition
   // => fabs(cos(angle)) <= sin(I_angle + inc_angle)
   if (((angle_sum.sin < 0.0) || (angle_sum.cos <= 0.0)) ||
       (fabs(dot) <= angle_sum.sin)) {
      search_bnd_circle_I_node(
         node, bnd_circle, overlap_cells, overlap_cells_array_size,
         num_overlap_cells, search_interval_tree_buffer, prev_gc_norm_vector);
   }
}

static void search_bnd_circle(struct sphere_part_node * node,
                              struct bounding_circle bnd_circle,
                              size_t ** restrict overlap_cells,
                              size_t * overlap_cells_array_size,
                              size_t * restrict num_overlap_cells,
                              struct overlaps * search_interval_tree_buffer,
                              double prev_gc_norm_vector[]) {

  // if the bounding circle has an angle in the range of [0;PI/2[
  if (bnd_circle.inc_angle.cos > 0.0)
    search_small_bnd_circle(
      node, bnd_circle, overlap_cells, overlap_cells_array_size,
      num_overlap_cells, search_interval_tree_buffer, prev_gc_norm_vector);
  else
    search_big_bnd_circle(
      node, bnd_circle, overlap_cells, overlap_cells_array_size,
      num_overlap_cells, search_interval_tree_buffer, prev_gc_norm_vector);
}

static inline void check_leaf_NN(
  struct point_id_xyz * points, size_t num_points,
  double * point_coordinates_xyz, struct sin_cos_angle * best_angle,
  double (**result_coordinates_xyz)[3],
  size_t * result_coordinates_xyz_array_size, size_t ** local_point_ids,
  size_t * local_point_ids_array_size, size_t total_num_local_point_ids,
  size_t * num_local_point_ids) {

  size_t * local_point_ids_ = *local_point_ids;
  size_t local_point_ids_array_size_ = *local_point_ids_array_size;
  double (*result_coordinates_xyz_)[3];
  size_t result_coordinates_xyz_array_size_;
  size_t num_local_point_ids_ = *num_local_point_ids;

  if (result_coordinates_xyz != NULL) {
    result_coordinates_xyz_ = *result_coordinates_xyz;
    result_coordinates_xyz_array_size_ = *result_coordinates_xyz_array_size;
    ENSURE_ARRAY_SIZE(
      result_coordinates_xyz_, result_coordinates_xyz_array_size_,
      total_num_local_point_ids + num_local_point_ids_ + num_points);
    *result_coordinates_xyz = result_coordinates_xyz_;
    *result_coordinates_xyz_array_size = result_coordinates_xyz_array_size_;
    result_coordinates_xyz_ += total_num_local_point_ids;
  }
  ENSURE_ARRAY_SIZE(
    local_point_ids_, local_point_ids_array_size_,
    total_num_local_point_ids + num_local_point_ids_ + num_points);
  *local_point_ids = local_point_ids_;
  *local_point_ids_array_size = local_point_ids_array_size_;
  local_point_ids_ += total_num_local_point_ids;


  // check leaf for results
  for (size_t i = 0; i < num_points; ++i) {

    struct sin_cos_angle curr_angle =
      get_vector_angle_2(
        points[i].coordinates_xyz, point_coordinates_xyz);
    int compare = compare_angles(curr_angle, *best_angle);

    // if the point is worse than the currently best point
    if (compare > 0) continue;

    // if we found a better point
    if (compare < 0) {

      *best_angle = curr_angle;
      num_local_point_ids_ = 1;
      if (result_coordinates_xyz != NULL) {
        result_coordinates_xyz_[0][0] = points[i].coordinates_xyz[0];
        result_coordinates_xyz_[0][1] = points[i].coordinates_xyz[1];
        result_coordinates_xyz_[0][2] = points[i].coordinates_xyz[2];
      }
      local_point_ids_[0] = points[i].idx;

    } else {

      if (result_coordinates_xyz != NULL) {
        result_coordinates_xyz_[num_local_point_ids_][0] =
          points[i].coordinates_xyz[0];
        result_coordinates_xyz_[num_local_point_ids_][1] =
          points[i].coordinates_xyz[1];
        result_coordinates_xyz_[num_local_point_ids_][2] =
          points[i].coordinates_xyz[2];
      }
      local_point_ids_[num_local_point_ids_] = points[i].idx;
      num_local_point_ids_++;
    }
  }

  *num_local_point_ids = num_local_point_ids_;
}

static void point_search_NN(
  struct bounding_circle * bnd_circle, double (**result_coordinates_xyz)[3],
  size_t * result_coordinates_xyz_array_size, size_t ** local_point_ids,
  size_t * local_point_ids_array_size, size_t total_num_local_point_ids,
  size_t * num_local_point_ids, double * dot_stack,
  struct point_sphere_part_node ** node_stack,
  int * flags, size_t curr_tree_depth) {

  double * point_coordinates_xyz = bnd_circle->base_vector;
  struct sin_cos_angle best_angle = bnd_circle->inc_angle;

  double dot = dot_stack[curr_tree_depth];
  struct point_sphere_part_node * node = node_stack[curr_tree_depth];
  int skip_U = flags[curr_tree_depth] & U_FLAG;
  int skip_T = flags[curr_tree_depth] & T_FLAG;

  do {

    if (!skip_U) {

      flags[curr_tree_depth] |= U_FLAG;

      // angle + inc_angle >= M_PI_2
      if ((dot < best_angle.sin) | (best_angle.cos <= 0.0)) {

        if (node->flags & U_IS_LEAF) {

          check_leaf_NN(
            (struct point_id_xyz *)(node->U), node->U_size,
            point_coordinates_xyz, &best_angle, result_coordinates_xyz,
            result_coordinates_xyz_array_size, local_point_ids,
            local_point_ids_array_size, total_num_local_point_ids,
            num_local_point_ids);

        } else {

          // traverse down one level
          ++curr_tree_depth;
          node = (struct point_sphere_part_node *)(node->U);
          dot = node->gc_norm_vector[0] * point_coordinates_xyz[0] +
                node->gc_norm_vector[1] * point_coordinates_xyz[1] +
                node->gc_norm_vector[2] * point_coordinates_xyz[2];
          dot_stack[curr_tree_depth] = dot;
          node_stack[curr_tree_depth] = node;
          flags[curr_tree_depth] = 0;
          skip_U = 0;
          skip_T = 0;
          continue;
        }
      }
    }

    if (!skip_T) {

      flags[curr_tree_depth] = U_FLAG + T_FLAG;

      // angle - inc_angle < M_PI_2
      if ((dot > - best_angle.sin) || (best_angle.cos <= 0.0)) {

        if (node->flags & T_IS_LEAF) {

           check_leaf_NN(
             (struct point_id_xyz *)(node->T), node->T_size,
             point_coordinates_xyz, &best_angle, result_coordinates_xyz,
             result_coordinates_xyz_array_size, local_point_ids,
             local_point_ids_array_size, total_num_local_point_ids,
             num_local_point_ids);

        } else {

           // traverse down one level
           ++curr_tree_depth;
           node = (struct point_sphere_part_node *)(node->T);
           dot = node->gc_norm_vector[0] * point_coordinates_xyz[0] +
                 node->gc_norm_vector[1] * point_coordinates_xyz[1] +
                 node->gc_norm_vector[2] * point_coordinates_xyz[2];
           dot_stack[curr_tree_depth] = dot;
           node_stack[curr_tree_depth] = node;
           flags[curr_tree_depth] = 0;
           skip_U = 0;
           skip_T = 0;
           continue;
        }
      }
    }

    if (curr_tree_depth == 0) break;

    // go up one level in the tree

    curr_tree_depth--;
    dot = dot_stack[curr_tree_depth];
    node = node_stack[curr_tree_depth];
    skip_U = flags[curr_tree_depth] & U_FLAG;
    skip_T = flags[curr_tree_depth] & T_FLAG;

  } while (1);

  bnd_circle->inc_angle = best_angle;
}

static int point_check_bnd_circle(
   struct point_sphere_part_node * node, struct bounding_circle bnd_circle) {

   double dot = node->gc_norm_vector[0]*bnd_circle.base_vector[0] +
                node->gc_norm_vector[1]*bnd_circle.base_vector[1] +
                node->gc_norm_vector[2]*bnd_circle.base_vector[2];

   int ret = 0;

   // angle + inc_angle >= M_PI_2
   if (dot <= bnd_circle.inc_angle.sin) {

      if (node->flags & U_IS_LEAF) {

         struct point_id_xyz * U = (struct point_id_xyz *)(node->U);
         size_t U_size = node->U_size;
         for (size_t i = 0; i < U_size; ++i) {
            double cos_angle =
               U[i].coordinates_xyz[0] * bnd_circle.base_vector[0] +
               U[i].coordinates_xyz[1] * bnd_circle.base_vector[1] +
               U[i].coordinates_xyz[2] * bnd_circle.base_vector[2];
            if (cos_angle > bnd_circle.inc_angle.cos) return 1;
         }

      } else {
         ret = point_check_bnd_circle(node->U, bnd_circle);
      }
   }

   // angle - inc_angle < M_PI_2
   if ((!ret) && (dot > - bnd_circle.inc_angle.sin)) {

      if (node->flags & T_IS_LEAF) {

         struct point_id_xyz * T = (struct point_id_xyz *)(node->T);
         size_t T_size = node->T_size;
         for (size_t i = 0; i < T_size; ++i) {
            double cos_angle =
               T[i].coordinates_xyz[0] * bnd_circle.base_vector[0] +
               T[i].coordinates_xyz[1] * bnd_circle.base_vector[1] +
               T[i].coordinates_xyz[2] * bnd_circle.base_vector[2];
            if (cos_angle > bnd_circle.inc_angle.cos) return 1;
         }

      } else {
         ret = point_check_bnd_circle(node->T, bnd_circle);
      }
   }

   return ret;
}

static inline int leaf_contains_matching_points(
  struct point_id_xyz * points, size_t num_points, double coordinate_xyz[3],
  size_t ** local_point_ids, size_t * local_point_ids_array_size,
  double (**result_coordinates_xyz)[3],
  size_t * result_coordinates_xyz_array_size,
  size_t total_num_local_point_ids, size_t * num_local_point_ids) {

  for (size_t i = 0; i < num_points; ++i) {

    // if the points are nearly identical
    if ((fabs(points[i].coordinates_xyz[0] - coordinate_xyz[0]) < yac_angle_tol) &&
        (fabs(points[i].coordinates_xyz[1] - coordinate_xyz[1]) < yac_angle_tol) &&
        (fabs(points[i].coordinates_xyz[2] - coordinate_xyz[2]) < yac_angle_tol)) {

      ENSURE_ARRAY_SIZE(*local_point_ids, *local_point_ids_array_size,
                        total_num_local_point_ids + num_points - i);
      size_t * local_point_ids_ =
        (*local_point_ids) + total_num_local_point_ids;

      double (*result_coordinates_xyz_)[3];

      if (result_coordinates_xyz != NULL) {
        ENSURE_ARRAY_SIZE(*result_coordinates_xyz,
                          *result_coordinates_xyz_array_size,
                          total_num_local_point_ids + num_points - i);
        result_coordinates_xyz_ =
          (*result_coordinates_xyz) + total_num_local_point_ids;
      } else {
        result_coordinates_xyz_ = NULL;
      }

      local_point_ids_[0] = points[i].idx;
      if (result_coordinates_xyz_ != NULL) {
        result_coordinates_xyz_[0][0] = points[i].coordinates_xyz[0];
        result_coordinates_xyz_[0][1] = points[i].coordinates_xyz[1];
        result_coordinates_xyz_[0][2] = points[i].coordinates_xyz[2];
      }

      size_t num_local_point_ids_ = 1;

      for (i += 1; i < num_points; ++i) {
        // if the points are nearly identical
        if ((fabs(points[i].coordinates_xyz[0] - coordinate_xyz[0]) < yac_angle_tol) &&
            (fabs(points[i].coordinates_xyz[1] - coordinate_xyz[1]) < yac_angle_tol) &&
            (fabs(points[i].coordinates_xyz[2] - coordinate_xyz[2]) < yac_angle_tol)) {

          local_point_ids_[num_local_point_ids_] = points[i].idx;
          if (result_coordinates_xyz_ != NULL) {
            result_coordinates_xyz_[num_local_point_ids_][0] =
              points[i].coordinates_xyz[0];
            result_coordinates_xyz_[num_local_point_ids_][1] =
              points[i].coordinates_xyz[1];
            result_coordinates_xyz_[num_local_point_ids_][2] =
              points[i].coordinates_xyz[2];
          }
          ++num_local_point_ids_;
        }
      }

      *num_local_point_ids = num_local_point_ids_;

      return 1;
    }
  }

  return 0;
}

void yac_point_sphere_part_search_NN(struct point_sphere_part_search * search,
                                     size_t num_points,
                                     double (*coordinates_xyz)[3],
                                     double * cos_angles,
                                     double (**result_coordinates_xyz)[3],
                                     size_t * result_coordinates_xyz_array_size,
                                     size_t ** local_point_ids,
                                     size_t * local_point_ids_array_size,
                                     size_t * num_local_point_ids) {

  memset(num_local_point_ids, 0, num_points * sizeof(*num_local_point_ids));

  if (search == NULL) return;

  struct point_sphere_part_node * base_node = &(search->base_node);

  size_t total_num_local_point_ids = 0;

  double * dot_stack = malloc(search->max_tree_depth * sizeof(*dot_stack));
  struct point_sphere_part_node ** node_stack =
    malloc(search->max_tree_depth * sizeof(*node_stack));
  int * flags = malloc(search->max_tree_depth * sizeof(*flags));

  for (size_t i = 0; i < num_points; ++i) {

    struct point_sphere_part_node * curr_node = base_node;

    double * curr_coordinates_xyz = coordinates_xyz[i];

    size_t curr_tree_depth = 0;
    struct point_id_xyz * points = NULL;
    size_t num_points = 0;

    // get the matching leaf for the current point
    do {

      double dot = curr_node->gc_norm_vector[0]*curr_coordinates_xyz[0] +
                   curr_node->gc_norm_vector[1]*curr_coordinates_xyz[1] +
                   curr_node->gc_norm_vector[2]*curr_coordinates_xyz[2];

      dot_stack[curr_tree_depth] = dot;
      node_stack[curr_tree_depth] = curr_node;
      flags[curr_tree_depth] = 0;

      // angle > M_PI_2
      if (dot < yac_angle_tol) {

        flags[curr_tree_depth] = U_FLAG;

        if (curr_node->flags & U_IS_LEAF) {
          if (curr_node->U_size > 0) {
            points = (struct point_id_xyz*)(curr_node->U);
            num_points = curr_node->U_size;
            break;
          } else {
            flags[curr_tree_depth] = T_FLAG;
            if (curr_node->flags & T_IS_LEAF) {
              points = (struct point_id_xyz*)(curr_node->T);
              num_points = curr_node->T_size;
              break;
            } else {
              curr_node = curr_node->T;
            }
          }
        } else curr_node = curr_node->U;

      // angle < M_PI_2
      } else if (dot > -yac_angle_tol) {

        flags[curr_tree_depth] = T_FLAG;

        if (curr_node->flags & T_IS_LEAF) {
          if (curr_node->T_size > 0) {
            points = (struct point_id_xyz*)(curr_node->T);
            num_points = curr_node->T_size;
            break;
          } else {
            flags[curr_tree_depth] = U_FLAG;
            if (curr_node->flags & U_IS_LEAF) {
              points = (struct point_id_xyz*)(curr_node->U);
              num_points = curr_node->U_size;
              break;
            } else {
              curr_node = curr_node->U;
            }
          }
        } else curr_node = curr_node->T;
      }

      curr_tree_depth++;
    } while (1);

    // if we do not have to do a finer search
    if (leaf_contains_matching_points(
          points, num_points, curr_coordinates_xyz, local_point_ids,
          local_point_ids_array_size, result_coordinates_xyz,
          result_coordinates_xyz_array_size, total_num_local_point_ids,
          num_local_point_ids + i)) {

      if (cos_angles != NULL) cos_angles[i] = 1.0;

    } else {

      struct bounding_circle bnd_circle;
      bnd_circle.base_vector[0] = curr_coordinates_xyz[0];
      bnd_circle.base_vector[1] = curr_coordinates_xyz[1];
      bnd_circle.base_vector[2] = curr_coordinates_xyz[2];
      bnd_circle.inc_angle = SIN_COS_M_PI;

      check_leaf_NN(
        points, num_points, curr_coordinates_xyz, &(bnd_circle.inc_angle),
        result_coordinates_xyz, result_coordinates_xyz_array_size,
        local_point_ids, local_point_ids_array_size, total_num_local_point_ids,
        num_local_point_ids + i);

      // get best result points
      point_search_NN(
        &bnd_circle, result_coordinates_xyz, result_coordinates_xyz_array_size,
        local_point_ids, local_point_ids_array_size, total_num_local_point_ids,
        num_local_point_ids + i, dot_stack, node_stack, flags, curr_tree_depth);

      if (cos_angles != NULL) cos_angles[i] = bnd_circle.inc_angle.cos;
    }

    total_num_local_point_ids += num_local_point_ids[i];
  }

  free(flags);
  free(node_stack);
  free(dot_stack);
}

static inline int compare_point_id_xyz_angle(const void * a, const void * b) {

  const struct point_id_xyz_angle * p_a = (const struct point_id_xyz_angle *)a;
  const struct point_id_xyz_angle * p_b = (const struct point_id_xyz_angle *)b;

  int ret = (p_a->cos_angle < p_b->cos_angle) -
            (p_a->cos_angle > p_b->cos_angle);

  if (ret != 0) return ret;

  return (p_a->idx > p_b->idx) - (p_a->idx < p_b->idx);
}

static size_t initial_point_bnd_search_NNN(
  size_t n, struct point_id_xyz * points, size_t num_points,
  double * point_coordinates_xyz, struct point_id_xyz_angle ** results,
  size_t * results_array_size) {

  assert(num_points > 0);

  ENSURE_ARRAY_SIZE(*results, *results_array_size, num_points);
  struct point_id_xyz_angle * results_ = *results;

  for (size_t i = 0; i < num_points; ++i) {

    *(struct point_id_xyz*)(results_ + i) = points[i];
    results_[i].cos_angle =
      points[i].coordinates_xyz[0] * point_coordinates_xyz[0] +
      points[i].coordinates_xyz[1] * point_coordinates_xyz[1] +
      points[i].coordinates_xyz[2] * point_coordinates_xyz[2];
  }

  qsort(results_, num_points, sizeof(*results_), compare_point_id_xyz_angle);

  if (num_points <= n) return num_points;

  size_t num_results;
  double min_cos_angle = results_[n - 1].cos_angle;

  for (num_results = n;
       (num_results < num_points) &&
       (min_cos_angle == results_[num_results].cos_angle); ++num_results);

  return num_results;
}

static inline struct sin_cos_angle check_leaf_NNN(
  size_t n, double * point_coordinates_xyz,
  struct point_id_xyz * points, size_t num_points,
  struct point_id_xyz_angle ** results, size_t * results_array_size,
  size_t * num_results, struct sin_cos_angle curr_angle) {

  size_t num_results_ = *num_results;
  ENSURE_ARRAY_SIZE(*results, *results_array_size, num_results_ + num_points);
  struct point_id_xyz_angle * results_ = *results;

  int flag = 0;

  double min_cos_angle = results_[num_results_-1].cos_angle;

  // check leaf for results
  for (size_t i = 0; i < num_points; ++i) {

    double curr_cos_angle =
      points[i].coordinates_xyz[0] * point_coordinates_xyz[0] +
      points[i].coordinates_xyz[1] * point_coordinates_xyz[1] +
      points[i].coordinates_xyz[2] * point_coordinates_xyz[2];

    // if the point is worse than the currently best point
    if (curr_cos_angle < min_cos_angle) continue;

    struct point_id_xyz_angle point;
    *(struct point_id_xyz*)(&point) = points[i];
    point.cos_angle = curr_cos_angle;

    // insert point
    size_t j;
    for (j = 0; j < num_results_; ++j) {

      if (compare_point_id_xyz_angle(
            &point, results_ + num_results_ - j - 1) < 0) {
        results_[num_results_ - j] = results_[num_results_ - j - 1];
      } else {
        break;
      }
    }
    results_[num_results_ - j] = point;

    ++num_results_;
    flag = 1;
  }

  if (flag) {

    if (num_results_ > n) {

      size_t new_num_results;
      double min_cos_angle = results_[n - 1].cos_angle;

      for (new_num_results = n;
           (new_num_results < num_results_) &&
           (min_cos_angle == results_[new_num_results].cos_angle);
           ++new_num_results);
      num_results_ = new_num_results;
    }
    *num_results = num_results_;

    return
      get_vector_angle_2(
        results_[num_results_-1].coordinates_xyz, point_coordinates_xyz);
  } else return curr_angle;
}

static void point_search_NNN(
  size_t n, double * point_coordinates_xyz,
  struct point_id_xyz_angle ** results, size_t * results_array_size,
  size_t * num_results, double * dot_stack,
  struct point_sphere_part_node ** node_stack, int * flags,
  size_t curr_tree_depth) {

  struct sin_cos_angle angle =
    get_vector_angle_2(
      (*results)[(*num_results)-1].coordinates_xyz, point_coordinates_xyz);

  // if we have already found at least n exactly matching points
  if ((*num_results >= n) && (angle.sin <= yac_angle_tol)) return;

  double dot = dot_stack[curr_tree_depth];
  struct point_sphere_part_node * node = node_stack[curr_tree_depth];
  int skip_U = flags[curr_tree_depth] & U_FLAG;
  int skip_T = flags[curr_tree_depth] & T_FLAG;

  do {

    if (!skip_U) {

      flags[curr_tree_depth] |= U_FLAG;

      // angle + inc_angle >= M_PI_2
      if ((dot < angle.sin) | (angle.cos <= 0.0)) {

        if (node->flags & U_IS_LEAF) {

          angle = check_leaf_NNN(
            n, point_coordinates_xyz, (struct point_id_xyz *)(node->U),
            node->U_size, results, results_array_size, num_results, angle);

        } else {

          // traverse down one level
          ++curr_tree_depth;
          node = (struct point_sphere_part_node *)(node->U);
          dot = node->gc_norm_vector[0] * point_coordinates_xyz[0] +
                node->gc_norm_vector[1] * point_coordinates_xyz[1] +
                node->gc_norm_vector[2] * point_coordinates_xyz[2];
          dot_stack[curr_tree_depth] = dot;
          node_stack[curr_tree_depth] = node;
          flags[curr_tree_depth] = 0;
          skip_U = 0;
          skip_T = 0;
          continue;
        }
      }
    }

    if (!skip_T) {

      flags[curr_tree_depth] = U_FLAG + T_FLAG;

      // angle - inc_angle < M_PI_2
      if ((dot > - angle.sin) || (angle.cos <= 0.0)) {

        if (node->flags & T_IS_LEAF) {

          angle = check_leaf_NNN(
            n, point_coordinates_xyz, (struct point_id_xyz *)(node->T),
            node->T_size, results, results_array_size, num_results, angle);

        } else {

          // traverse down one level
          ++curr_tree_depth;
          node = (struct point_sphere_part_node *)(node->T);
          dot = node->gc_norm_vector[0] * point_coordinates_xyz[0] +
                node->gc_norm_vector[1] * point_coordinates_xyz[1] +
                node->gc_norm_vector[2] * point_coordinates_xyz[2];
          dot_stack[curr_tree_depth] = dot;
          node_stack[curr_tree_depth] = node;
          flags[curr_tree_depth] = 0;
          skip_U = 0;
          skip_T = 0;
          continue;
        }
      }
    }

    if (curr_tree_depth == 0) break;

    // go up one level in the tree

    curr_tree_depth--;
    dot = dot_stack[curr_tree_depth];
    node = node_stack[curr_tree_depth];
    skip_U = flags[curr_tree_depth] & U_FLAG;
    skip_T = flags[curr_tree_depth] & T_FLAG;

  } while (1);
}

void yac_point_sphere_part_search_NNN(struct point_sphere_part_search * search,
                                      size_t num_points,
                                      double (*coordinates_xyz)[3], size_t n,
                                      double ** cos_angles,
                                      size_t * cos_angles_array_size,
                                      double (**result_coordinates_xyz)[3],
                                      size_t * result_coordinates_xyz_array_size,
                                      size_t ** local_point_ids,
                                      size_t * local_point_ids_array_size,
                                      size_t * num_local_point_ids) {

  if (cos_angles != NULL)
    ENSURE_ARRAY_SIZE(*cos_angles, *cos_angles_array_size, num_points * n);

  if (n == 1) {
    yac_point_sphere_part_search_NN(
      search, num_points, coordinates_xyz, (cos_angles!=NULL)?*cos_angles:NULL,
      result_coordinates_xyz, result_coordinates_xyz_array_size,
      local_point_ids, local_point_ids_array_size, num_local_point_ids);

    size_t total_num_local_points = 0;
    for (size_t i = 0; i < num_points; ++i)
      total_num_local_points += num_local_point_ids[i];

    if ((cos_angles != NULL) && (total_num_local_points > num_points)) {

      ENSURE_ARRAY_SIZE(*cos_angles, *cos_angles_array_size,
                        total_num_local_points);

      for (size_t i = num_points - 1, offset = total_num_local_points - 1;
           i < num_points; i--) {

        for (size_t j = 0; j < num_local_point_ids[i]; ++j, --offset)
          (*cos_angles)[offset] = (*cos_angles)[i];
      }
    }
    return;
  }

  if (search == NULL) return;

  struct point_sphere_part_node * base_node = &(search->base_node);

  size_t total_num_local_point_ids = 0;

  double * dot_stack = malloc(search->max_tree_depth * sizeof(*dot_stack));
  struct point_sphere_part_node ** node_stack =
    malloc(search->max_tree_depth * sizeof(*node_stack));
  int * flags = malloc(search->max_tree_depth * sizeof(*flags));

  struct point_id_xyz_angle * results = NULL;
  size_t results_array_size = 0;

  for (size_t i = 0; i < num_points; ++i) {

    struct point_sphere_part_node * curr_node = base_node;

    double * curr_coordinates_xyz = coordinates_xyz[i];

    size_t curr_tree_depth = 0;
    struct point_id_xyz * points = search->points;
    size_t num_points = 0;

    // get the matching leaf for the current point
    do {

      double dot = curr_node->gc_norm_vector[0]*curr_coordinates_xyz[0] +
                   curr_node->gc_norm_vector[1]*curr_coordinates_xyz[1] +
                   curr_node->gc_norm_vector[2]*curr_coordinates_xyz[2];

      dot_stack[curr_tree_depth] = dot;
      node_stack[curr_tree_depth] = curr_node;
      flags[curr_tree_depth] = 0;

      // angle >= M_PI_2
      if (dot <= 0.0) {

        if (curr_node->U_size < n) {

          flags[curr_tree_depth] = U_FLAG + T_FLAG;
          num_points = curr_node->U_size + curr_node->T_size;
          break;
        } else if (curr_node->flags & U_IS_LEAF) {

          flags[curr_tree_depth] = U_FLAG;
          num_points = curr_node->U_size;
          break;
        } else {

          flags[curr_tree_depth] = U_FLAG;
          curr_node = curr_node->U;
        }

      } else {

        if (curr_node->T_size < n) {

          flags[curr_tree_depth] = U_FLAG + T_FLAG;
          num_points = curr_node->U_size + curr_node->T_size;
          break;
        } else if (curr_node->flags & T_IS_LEAF) {

          points += curr_node->U_size;
          flags[curr_tree_depth] = T_FLAG;
          num_points = curr_node->T_size;
          break;
        } else {

          points += curr_node->U_size;
          flags[curr_tree_depth] = T_FLAG;
          curr_node = curr_node->T;
        }
      }

      curr_tree_depth++;
    } while (1);

    assert(num_points > 0);

    size_t num_results =
      initial_point_bnd_search_NNN(
        n, points, num_points, curr_coordinates_xyz,
        &results, &results_array_size);

    // do a detailed search
    point_search_NNN(
      n, curr_coordinates_xyz, &results, &results_array_size, &num_results,
      dot_stack, node_stack, flags, curr_tree_depth);

    // extract the results
    ENSURE_ARRAY_SIZE(*local_point_ids, *local_point_ids_array_size,
                      total_num_local_point_ids + num_results);
    size_t * local_point_ids_ =
      (*local_point_ids) + total_num_local_point_ids;
    double * cos_angles_;
    if (cos_angles != NULL) {
      ENSURE_ARRAY_SIZE(*cos_angles, *cos_angles_array_size,
                        total_num_local_point_ids + num_results);
      cos_angles_ = (*cos_angles) + total_num_local_point_ids;
    } else {
      cos_angles_ = NULL;
    }
    double (*result_coordinates_xyz_)[3];
    if (result_coordinates_xyz != NULL) {
      ENSURE_ARRAY_SIZE(*result_coordinates_xyz,
                        *result_coordinates_xyz_array_size,
                        total_num_local_point_ids + num_results);
      result_coordinates_xyz_ =
        (*result_coordinates_xyz) + total_num_local_point_ids;
    } else {
      result_coordinates_xyz_ = NULL;
    }

    for (size_t i = 0; i < num_results; ++i) {

      local_point_ids_[i] = results[i].idx;
      if (cos_angles_ != NULL) cos_angles_[i] = results[i].cos_angle;
      if (result_coordinates_xyz_ != NULL) {
        result_coordinates_xyz_[i][0] = results[i].coordinates_xyz[0];
        result_coordinates_xyz_[i][1] = results[i].coordinates_xyz[1];
        result_coordinates_xyz_[i][2] = results[i].coordinates_xyz[2];
      }
    }

    num_local_point_ids[i] = num_results;
    total_num_local_point_ids += num_results;
  }

  free(results);
  free(flags);
  free(node_stack);
  free(dot_stack);
}

int yac_point_sphere_part_search_bnd_circle_contains_points(
  struct point_sphere_part_search * search, struct bounding_circle circle) {

  if (search == NULL) return 0;

  return point_check_bnd_circle(&(search->base_node), circle);
}

static void search_point(struct sphere_part_node * node,
                         double point[],
                         size_t ** overlap_cells,
                         size_t * overlap_cells_array_size,
                         size_t * num_overlap_cells,
                         struct overlaps * search_interval_tree_buffer,
                         double prev_gc_norm_vector[]) {

   double dot = point[0] * node->gc_norm_vector[0] +
                point[1] * node->gc_norm_vector[1] +
                point[2] * node->gc_norm_vector[2];

   // angle < M_PI_2
   if (dot > -yac_angle_tol) {

      if (node->flags & T_IS_LEAF) {

         ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                           *num_overlap_cells + node->T_size);
         memcpy(*overlap_cells + *num_overlap_cells, node->T,
                node->T_size * sizeof(**overlap_cells));
         *num_overlap_cells += node->T_size;

      } else {
         search_point(node->T, point, overlap_cells,
                      overlap_cells_array_size, num_overlap_cells,
                      search_interval_tree_buffer, node->gc_norm_vector);
      }
   }

   // angle > M_PI_2
   if (dot < yac_angle_tol) {

      if (node->flags & U_IS_LEAF) {

         ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                           *num_overlap_cells + node->U_size);
         memcpy(*overlap_cells + *num_overlap_cells, node->U,
                node->U_size * sizeof(**overlap_cells));
         *num_overlap_cells += node->U_size;

      } else {
         search_point(node->U, point, overlap_cells,
                      overlap_cells_array_size, num_overlap_cells,
                      search_interval_tree_buffer, node->gc_norm_vector);
      }
   }

   // fabs(angle - M_PI_2) <= (node->I_angle)
   // fabs(cos(angle)) <= sin(node->I_angle)
   if (fabs(dot) <= node->I_angle.sin) {

      if (node->flags & I_IS_INTERVAL_TREE) {

         double GCp[3], bVp[3];
         crossproduct_ld(node->gc_norm_vector, point, GCp);
         crossproduct_ld(GCp, node->gc_norm_vector, bVp);
         normalise_vector(bVp);
         double base_angle = get_vector_angle(bVp, prev_gc_norm_vector);

         struct interval search_interval =
          {.left=base_angle, .right=base_angle};

         search_interval_tree_buffer->num_overlaps = 0;

         yac_search_interval_tree(node->I.ivt.head_node, node->I.ivt.num_nodes,
                                  search_interval, search_interval_tree_buffer);

         ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                           *num_overlap_cells +
                           search_interval_tree_buffer->num_overlaps);

         for (size_t i = 0; i < search_interval_tree_buffer->num_overlaps;
              ++i) {
            (*overlap_cells)[(*num_overlap_cells)+i] =
               node->I.ivt.head_node[
                  search_interval_tree_buffer->overlap_iv[i]].value;
         }

         *num_overlap_cells += search_interval_tree_buffer->num_overlaps;

      } else {

         ENSURE_ARRAY_SIZE(*overlap_cells, *overlap_cells_array_size,
                           *num_overlap_cells + node->I_size);
         memcpy(*overlap_cells + *num_overlap_cells, node->I.list,
                node->I_size * sizeof(**overlap_cells));
         *num_overlap_cells += node->I_size;
      }
   }
}

static void sphere_part_do_cell_search(struct grid_search * search,
                                       struct grid * grid_data,
                                       struct dep_list * tgt_to_src_cells) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;
   struct sphere_part_node * base_node = &(sp_search->base_node);

   size_t num_cells = (size_t)yac_get_num_grid_cells(grid_data);

   size_t * temp_search_results = NULL;
   size_t temp_search_results_array_size = 0;
   size_t num_temp_search_results = 0;

   struct grid_cell cell_a, cell_b;
   struct bounding_circle circle_a, circle_b;

   yac_init_grid_cell(&cell_a);
   yac_init_grid_cell(&cell_b);

   struct overlaps search_interval_tree_buffer = {0, 0, NULL};

   unsigned * num_src_per_tgt_cell =
      calloc(num_cells, sizeof(*num_src_per_tgt_cell));
   unsigned * tgt_src_dependencies = NULL;
   size_t tgt_src_dependencies_size = 0;
   size_t total_num_dependencies = 0;

   for (size_t i = 0; i < num_cells; ++i) {

      yac_get_grid_cell2(grid_data, i, &cell_a, &circle_a);

      num_temp_search_results = 0;

      double gc_norm_vector[3] = {0.0,0.0,1.0};

      search_bnd_circle(
         base_node, circle_a, &temp_search_results,
         &temp_search_results_array_size, &num_temp_search_results,
         &search_interval_tree_buffer, gc_norm_vector);

      ENSURE_ARRAY_SIZE(tgt_src_dependencies, tgt_src_dependencies_size,
                        total_num_dependencies + num_temp_search_results);

      for (size_t j = 0; j < num_temp_search_results; ++j) {

         yac_get_grid_cell2(
            sp_search->grid_data, (unsigned)(temp_search_results[j]),
            &cell_b, &circle_b);

         if (yac_check_overlap_cells2(cell_a, circle_a, cell_b, circle_b)) {

            tgt_src_dependencies[total_num_dependencies++] =
               (unsigned)(temp_search_results[j]);
            num_src_per_tgt_cell[i]++;
         }
      }
   }

   yac_free_grid_cell(&cell_a);
   yac_free_grid_cell(&cell_b);
   free(temp_search_results);
   free(search_interval_tree_buffer.overlap_iv);

   tgt_src_dependencies = realloc(tgt_src_dependencies, total_num_dependencies *
                                  sizeof(tgt_src_dependencies));
   yac_init_dep_list(tgt_to_src_cells);
   yac_set_dependencies(tgt_to_src_cells, (unsigned)num_cells,
                        num_src_per_tgt_cell, tgt_src_dependencies);
}

static void sphere_part_do_cell_search_single(struct grid_search * search,
                                              struct grid_cell cell,
                                              size_t * n_cells,
                                              size_t * cells_size,
                                              size_t ** cells) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;
   struct sphere_part_node * base_node = &(sp_search->base_node);

   struct bounding_circle circle_a;

   yac_get_cell_bounding_circle(cell, &circle_a);

   struct overlaps search_interval_tree_buffer = {0, 0, NULL};

   double gc_norm_vector[3] = {0.0,0.0,1.0};

   size_t n_cells_ = 0;

   search_bnd_circle(base_node, circle_a, cells, cells_size, &n_cells_,
                     &search_interval_tree_buffer, gc_norm_vector);

   size_t n_cells_final = 0;

   struct grid_cell cell_b;
   struct bounding_circle circle_b;

   yac_init_grid_cell(&cell_b);

   for (size_t j = 0; j < n_cells_; ++j) {

      yac_get_grid_cell2(sp_search->grid_data, (unsigned)((*cells)[j]),
                         &cell_b, &circle_b);

      if (yac_check_overlap_cells2(cell, circle_a, cell_b, circle_b))
         (*cells)[n_cells_final++] = (*cells)[j];
   }

   *n_cells = n_cells_final;

   yac_free_grid_cell(&cell_b);
   free(search_interval_tree_buffer.overlap_iv);
}

static void sphere_part_do_point_search_c(struct grid_search * search,
                                          struct grid * grid_data,
                                          struct dep_list * tgt_to_src_cells) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;
   struct sphere_part_node * base_node = &(sp_search->base_node);

   size_t num_corners = (size_t)yac_get_num_grid_corners(grid_data);

   size_t * temp_search_results = NULL;
   size_t temp_search_results_array_size = 0;
   size_t num_temp_search_results = 0;

   struct grid_cell cell;
   struct bounding_circle bnd_circle;

   yac_init_grid_cell(&cell);

   struct overlaps search_interval_tree_buffer = {0, 0, NULL};

   unsigned * num_src_per_tgt_corner =
      calloc(num_corners, sizeof(*num_src_per_tgt_corner));
   unsigned * tgt_src_dependencies = NULL;
   size_t tgt_src_dependencies_size = 0;
   size_t total_num_dependencies = 0;

   for (size_t i = 0; i < num_corners; ++i) {
      double point_3d[3];
      LLtoXYZ(yac_get_corner_x_coord(grid_data, i),
              yac_get_corner_y_coord(grid_data, i), point_3d);

      num_temp_search_results = 0;

      double gc_norm_vector[3] = {0.0,0.0,1.0};

      search_point(base_node, point_3d, &temp_search_results,
                   &temp_search_results_array_size, &num_temp_search_results,
                   &search_interval_tree_buffer, gc_norm_vector);

      ENSURE_ARRAY_SIZE(tgt_src_dependencies, tgt_src_dependencies_size,
                        total_num_dependencies + num_temp_search_results);

      for (size_t j = 0; j < num_temp_search_results; ++j) {

         yac_get_grid_cell2(sp_search->grid_data, temp_search_results[j],
                            &cell, &bnd_circle);

         if (yac_point_in_cell2(point_3d, cell, bnd_circle)) {

            tgt_src_dependencies[total_num_dependencies++] =
               (unsigned)(temp_search_results[j]);
            num_src_per_tgt_corner[i]++;
         }
      }
   }

   yac_free_grid_cell(&cell);
   free(temp_search_results);
   free(search_interval_tree_buffer.overlap_iv);

   tgt_src_dependencies = realloc(tgt_src_dependencies, total_num_dependencies *
                                  sizeof(tgt_src_dependencies));
   yac_init_dep_list(tgt_to_src_cells);
   yac_set_dependencies(tgt_to_src_cells, (unsigned)num_corners,
                        num_src_per_tgt_corner, tgt_src_dependencies);
}

static int compare_uint (const void * a, const void * b) {
  return ( *(unsigned*)a - *(unsigned*)b );
}

static void sphere_part_do_point_search_c2(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * tgt_to_src_cells) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;
   struct sphere_part_node * base_node = &(sp_search->base_node);

   size_t * temp_search_results = NULL;
   size_t temp_search_results_array_size = 0;
   size_t num_temp_search_results = 0;

   struct grid_cell cell;
   struct bounding_circle bnd_circle;

   yac_init_grid_cell(&cell);

   struct overlaps search_interval_tree_buffer = {0, 0, NULL};

   unsigned * num_src_per_tgt_corner =
      calloc(num_points, sizeof(*num_src_per_tgt_corner));
   unsigned * tgt_src_dependencies = NULL;
   size_t tgt_src_dependencies_size = 0;
   size_t total_num_dependencies = 0;

   for (size_t i = 0; i < num_points; ++i) {

      double * curr_coordinates_xyz = coordinates_xyz[i];

      num_temp_search_results = 0;

      double gc_norm_vector[3] = {0.0,0.0,1.0};

      search_point(base_node, curr_coordinates_xyz, &temp_search_results,
                   &temp_search_results_array_size, &num_temp_search_results,
                   &search_interval_tree_buffer, gc_norm_vector);

      ENSURE_ARRAY_SIZE(tgt_src_dependencies, tgt_src_dependencies_size,
                        total_num_dependencies + num_temp_search_results);

      size_t num_matches = 0;

      for (size_t j = 0; j < num_temp_search_results; ++j) {

         yac_get_grid_cell2(
            sp_search->grid_data, (unsigned)(temp_search_results[j]),
            &cell, &bnd_circle);

         if (yac_point_in_cell2(curr_coordinates_xyz, cell, bnd_circle)) {

            tgt_src_dependencies[total_num_dependencies + num_matches] =
               (unsigned)(temp_search_results[j]);
            num_matches++;
         }
      }
      qsort(tgt_src_dependencies + total_num_dependencies, num_matches,
            sizeof(*tgt_src_dependencies), compare_uint);
      num_src_per_tgt_corner[i] = (unsigned)num_matches;
      total_num_dependencies += num_matches;
   }

   yac_free_grid_cell(&cell);
   free(temp_search_results);
   free(search_interval_tree_buffer.overlap_iv);

   tgt_src_dependencies = realloc(tgt_src_dependencies, total_num_dependencies *
                                  sizeof(tgt_src_dependencies));
   yac_init_dep_list(tgt_to_src_cells);
   yac_set_dependencies(tgt_to_src_cells, (unsigned)num_points,
                        num_src_per_tgt_corner, tgt_src_dependencies);
}

static void sphere_part_do_point_search_c3(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * tgt_to_src_cells,
                                           struct points * points) {

  yac_grid_search_utils_do_point_search_c3(
    search, coordinates_xyz, num_points, tgt_to_src_cells, points);
}

static void sphere_part_do_point_search_p(struct grid_search * search,
                                          struct grid * grid_data,
                                          struct dep_list * target_to_src_points) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;

   yac_grid_search_utils_do_point_search_p(search, sp_search->grid_data, grid_data,
                                           target_to_src_points);
}

static void sphere_part_do_point_search_p2(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * target_to_src_points) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;

   yac_grid_search_utils_do_point_search_p2(
      search, sp_search->grid_data, coordinates_xyz, num_points,
      target_to_src_points);
}

static void sphere_part_do_point_search_p3(struct grid_search * search,
                                           double (*coordinates_xyz)[3],
                                           size_t num_points,
                                           struct dep_list * target_to_src_points,
                                           struct points * points) {

   yac_grid_search_utils_do_point_search_p3(
      search, coordinates_xyz, num_points, target_to_src_points, points);
}


static void sphere_part_do_point_search_p4 (struct grid_search * search,
                                            double coordinate_xyz[3],
                                            size_t * n_points,
                                            size_t * points_size,
                                            size_t ** points) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;

   yac_grid_search_utils_do_point_search_p4(
      search, sp_search->grid_data, coordinate_xyz, n_points, points_size,
      points);
}

static void sphere_part_do_bnd_circle_search (struct grid_search * search,
                                              struct bounding_circle * bnd_circles,
                                              size_t num_bnd_circles,
                                              struct dep_list * bnd_to_cells) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;
   struct sphere_part_node * base_node = &(sp_search->base_node);

   size_t * temp_search_results = NULL;
   size_t temp_search_results_array_size = 0;
   size_t num_temp_search_results = 0;

   struct grid_cell cell;

   yac_init_grid_cell(&cell);

   struct overlaps search_interval_tree_buffer = {0, 0, NULL};

   unsigned * num_cells_per_bnd =
      calloc(num_bnd_circles, sizeof(*num_cells_per_bnd));
   unsigned * bnd_to_cells_dependencies = NULL;
   size_t bnd_to_cells_dependencies_size = 0;
   size_t total_num_dependencies = 0;

   for (size_t i = 0; i < num_bnd_circles; ++i) {

      num_temp_search_results = 0;

      double gc_norm_vector[3] = {0.0,0.0,1.0};

      search_bnd_circle(
         base_node, bnd_circles[i], &temp_search_results,
         &temp_search_results_array_size, &num_temp_search_results,
         &search_interval_tree_buffer, gc_norm_vector);

      ENSURE_ARRAY_SIZE(bnd_to_cells_dependencies,
                        bnd_to_cells_dependencies_size,
                        total_num_dependencies + num_temp_search_results);

      for (size_t j = 0; j < num_temp_search_results; ++j) {

         struct bounding_circle cell_bnd_circle;
         yac_get_grid_cell2(
            sp_search->grid_data, (unsigned)(temp_search_results[j]),
            &cell, &cell_bnd_circle);

         // if the bounding circle of the current cell overlaps with the current
         // bounding circle
         if (yac_extents_overlap(&cell_bnd_circle, bnd_circles + i)) {
            bnd_to_cells_dependencies[total_num_dependencies++] =
               (unsigned)(temp_search_results[j]);
            num_cells_per_bnd[i]++;
         }
      }
   }

   yac_free_grid_cell(&cell);
   free(temp_search_results);
   free(search_interval_tree_buffer.overlap_iv);

   bnd_to_cells_dependencies =
    realloc(bnd_to_cells_dependencies, total_num_dependencies *
            sizeof(bnd_to_cells_dependencies));
   yac_init_dep_list(bnd_to_cells);
   yac_set_dependencies(bnd_to_cells, (unsigned)num_bnd_circles,
                        num_cells_per_bnd, bnd_to_cells_dependencies);
}

static void free_sphere_part_tree (struct sphere_part_node tree) {

   // free I_list
   if (tree.flags & I_IS_INTERVAL_TREE)
      free(tree.I.ivt.head_node);

   if ((tree.flags & U_IS_LEAF) == 0) {
      free_sphere_part_tree(*(struct sphere_part_node*)(tree.U));
      free(tree.U);
   }

   if ((tree.flags & T_IS_LEAF) == 0) {
      free_sphere_part_tree(*(struct sphere_part_node*)(tree.T));
      free(tree.T);
   }
}

static void free_point_sphere_part_tree (struct point_sphere_part_node * tree) {

   if ((tree->flags & U_IS_LEAF) == 0) {
      free_point_sphere_part_tree(tree->U);
      free(tree->U);
   }

   if ((tree->flags & T_IS_LEAF) == 0) {
      free_point_sphere_part_tree(tree->T);
      free(tree->T);
   }
}

static void delete_sphere_part_search(struct grid_search * search) {

   struct sphere_part_search * sp_search = (struct sphere_part_search *)search;

   free_sphere_part_tree(sp_search->base_node);

   free(sp_search->local_cell_ids);
   free(sp_search);
}

void yac_delete_point_sphere_part_search(
   struct point_sphere_part_search * search) {

   if (search == NULL) return;

   free_point_sphere_part_tree(&(search->base_node));
   free(search->points);
   free(search);
}
