===================================================================
RCS file: /home/cvs/OpenXM/doc/ascm2001/homogeneous-network.tex,v
retrieving revision 1.1
retrieving revision 1.2
diff -u -p -r1.1 -r1.2
--- OpenXM/doc/ascm2001/homogeneous-network.tex	2001/03/07 02:42:10	1.1
+++ OpenXM/doc/ascm2001/homogeneous-network.tex	2001/03/07 07:17:02	1.2
@@ -1,4 +1,4 @@
-% $OpenXM$
+% $OpenXM: OpenXM/doc/ascm2001/homogeneous-network.tex,v 1.1 2001/03/07 02:42:10 noro Exp $
 
 \subsection{Distributed computation with homogeneous servers}
 \label{section:homog}
@@ -54,10 +54,14 @@ the computational cost and the communication cost for 
 Figure \ref{speedup} shows that 
 the speedup is satisfactory if the degree is large and $L$
 is not large, say, up to 10 under the above environment.
-If OpenXM provides operations for the broadcast and the reduction
+If OpenXM provides collective operations for broadcast and reduction
 such as {\tt MPI\_Bcast} and {\tt MPI\_Reduce} respectively, the cost of 
 sending $f_1$, $f_2$ and gathering $F_j$ may be reduced to $O(\log_2L)$
-and we can expect better results in such a case.
+and we can expect better results in such a case. In order to implement
+such operations we need new specifications for inter-sever communication
+and the session management. The will be proposed as OpenXM-RFC-102 in future.
+We note that preliminary experiments shows the collective operations
+works well on OpenXM.
 
 \subsubsection{Competitive distributed computation by various strategies}
 
@@ -95,3 +99,133 @@ def dgr(G,V,O,P0,P1)
   return [Win,R];
 }
 \end{verbatim}
+
+\subsubsection{Nesting of client-server communication}
+
+Under OpenXM-RFC-100 an OpenXM server can be a client of other servers.
+Figure \ref{tree} illustrates a tree-like structure of an OpenXM
+client-server communication.
+\begin{figure}
+\label{tree}
+\begin{center}
+\begin{picture}(200,140)(0,0)
+\put(70,120){\framebox(40,15){client}}
+\put(20,60){\framebox(40,15){server}}
+\put(70,60){\framebox(40,15){server}}
+\put(120,60){\framebox(40,15){server}}
+\put(0,0){\framebox(40,15){server}}
+\put(50,0){\framebox(40,15){server}}
+\put(135,0){\framebox(40,15){server}}
+
+\put(90,120){\vector(-1,-1){43}}
+\put(90,120){\vector(0,-1){43}}
+\put(90,120){\vector(1,-1){43}}
+\put(40,60){\vector(-1,-2){22}}
+\put(40,60){\vector(1,-2){22}}
+\put(140,60){\vector(1,-3){14}}
+\end{picture}
+\caption{Tree-like structure of client-server communication}
+\end{center}
+\end{figure}
+Such a computational model is useful for parallel implementation of
+algorithms whose task can be divided into subtasks recursively.  A
+typical example is {\it quicksort}, where an array to be sorted is
+partitioned into two sub-arrays and the algorithm is applied to each
+sub-array. In each level of recursion, two subtasks are generated
+and one can ask other OpenXM servers to execute them. Though
+this make little contribution to the efficiency, it is worth
+to show that such an attempt is very easy under OpenXM.
+Here is an Asir program.
+A predefined constant {\tt LevelMax} determines
+whether new servers are launched or whole subtasks are done on the server.
+
+\begin{verbatim}
+#define LevelMax 2
+extern Proc1, Proc2;
+Proc1 = -1$ Proc2 = -1$
+
+/* sort [A[P],...,A[Q]] by quicksort */
+def quickSort(A,P,Q,Level) {
+  if (Q-P < 1) return A;
+  Mp = idiv(P+Q,2); M = A[Mp]; B = P; E = Q;
+  while (1) {
+    while (A[B] < M) B++;
+    while (A[E] > M && B <= E) E--;
+    if (B >= E) break;
+    else { T = A[B]; A[B] = A[E]; A[E] = T; E--; }
+  }
+  if (E < P) E = P;
+  if (Level < LevelMax) {
+   /* launch new servers if necessary */
+   if (Proc1 == -1) Proc1 = ox_launch(0);
+   if (Proc2 == -1) Proc2 = ox_launch(0);
+   /* send the requests to the servers */
+   ox_rpc(Proc1,"quickSort",A,P,E,Level+1);
+   ox_rpc(Proc2,"quickSort",A,E+1,Q,Level+1);
+   if (E-P < Q-E) {
+     A1 = ox_pop_local(Proc1);
+     A2 = ox_pop_local(Proc2);
+   }else{
+     A2 = ox_pop_local(Proc2);
+     A1 = ox_pop_local(Proc1);
+   }
+   for (I=P; I<=E; I++) A[I] = A1[I];
+   for (I=E+1; I<=Q; I++) A[I] = A2[I];
+   return(A);
+  }else{
+   /* everything is done on this server */
+   quickSort(A,P,E,Level+1);
+   quickSort(A,E+1,Q,Level+1);
+   return(A);
+  }
+}
+\end{verbatim}
+
+Another example is a parallelization of the Cantor-Zassenhaus
+algorithm for polynomial factorization over finite fields. Its
+fundamental structure is similar to that of quicksort. By choosing a
+random polynomial, a polynomial is divided into two sub-factors with
+some probability. Then each subfactor is factorized recursively.  In
+the following program, one of the two sub-factors generated on a server
+is sent to another server and the other subfactor is factorized on the server
+itself. 
+\begin{verbatim}
+/* factorization of F */
+/* E = degree of irreducible factors in F */
+def c_z(F,E,Level)
+{
+  V = var(F); N = deg(F,V);
+  if ( N == E ) return [F];
+  M = field_order_ff(); K = idiv(N,E); L = [F];
+  while ( 1 ) {
+    W = monic_randpoly_ff(2*E,V);
+    T = generic_pwrmod_ff(W,F,idiv(M^E-1,2));
+    if ( !(W = T-1) ) continue;
+    G = ugcd(F,W);
+    if ( deg(G,V) && deg(G,V) < N ) {
+      if ( Level >= LevelMax ) {
+        /* everything is done on this server */
+        L1 = c_z(G,E,Level+1);
+        L2 = c_z(sdiv(F,G),E,Level+1);
+      } else {
+        /* launch a server if necessary */
+        if ( Proc1 < 0 ) Proc1 = ox_launch();
+        /* send a request with Level = Level+1 */
+        ox_cmo_rpc(Proc1,"ox_c_z",lmptop(G),E,
+            setmod_ff(),Level+1);
+        /* the rest is done on this server */
+        L2 = c_z(sdiv(F,G),E,Level+1);
+        L1 = map(simp_ff,ox_pop_cmo(Proc1));
+      }
+      return append(L1,L2);
+    }
+  }
+}
+\end{verbatim}
+
+
+
+
+
+
+