ROOTPWA
mutator.cc
Go to the documentation of this file.
1 
2 //
3 // Copyright 2009 Sebastian Neubert
4 //
5 // This file is part of rootpwa
6 //
7 // rootpwa is free software: you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation, either version 3 of the License, or
10 // (at your option) any later version.
11 //
12 // rootpwa is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with rootpwa. If not, see <http://www.gnu.org/licenses/>.
19 //
21 
22 #include <time.h>
23 #include "TH1I.h"
24 #include <algorithm>
25 #include <assert.h>
26 #include "wset.h"
27 
28 
29 void usage(const char* progname){
30  cerr << "usage: -P <wave pool> -S <seed> <ancestorlist>" << endl;
31  cerr << "[-F <number of fixed waves (first N waves in list)> default: 1 ]" << endl;
32  cerr << "[-E <number of waves to exchange> default: 5 ]" << endl;
33  cerr << "[-D <number of waves to drop> ]" << endl;
34  cerr << "[-A <number of waves to add> ]" << endl;
35  cerr << "[-V <range of number of waves to vay> ]" << endl;
36  cerr << "[-p 1 < selective pressure < 2, default: 1.5 ]" << endl;
37  cerr << "[-c 0 < crossover probability < 1 default> 0.75 ]" << endl;
38  cerr << progname << endl;
39 }
40 
41 
43 //
44 // Main starts here
45 //
47 
48 
49 int
50 main(int argc, char** argv){
51 
52  char* progname=argv[0];
53 
54  vector<TString> inputlistFiles; // ancestor list
55  vector<double> fitness;
56  TString wavepoolFile; // pool of waves to choose from
57  unsigned int Exch=0; // number of waves to exchange
58  unsigned int Drop=0; // number of waves to drop
59  unsigned int Add=0;
60  unsigned int Fix=1;
61  unsigned int Vary=0; // range of waves to add or subtract
62  unsigned int seed=0;
63 
64  double sel=1.5; // selective pressure 1<c<2
65  double pc=0.75; // crossover probability;
66 
67 extern char *optarg;
68  // extern int optind;
69  int c;
70  unsigned int optcount=1;
71  while ((c = getopt(argc, argv, "hE:A:V:D:P:F:S:c:p:")) != -1){
72  optcount+=1;
73  switch (c) {
74  case 'F':
75  Fix= atoi(optarg);
76  //cerr << "Fix=" << Fix << endl;
77  break;
78  case 'E':
79  Exch= atoi(optarg);
80  //cerr << "Exchange=" << Exch << endl;
81  break;
82  case 'D':
83  Drop= atoi(optarg);
84  //cerr << "Drop=" << Drop << endl;
85  break;
86  case 'A':
87  Add= atoi(optarg);
88  //cerr << "Add=" << Add << endl;
89  break;
90  case 'V':
91  Vary= atoi(optarg);
92  //cerr << "Vary=" << Add << endl;
93  break;
94  case 'p':
95  sel = atof(optarg);
96  //cerr << "Vary=" << Add << endl;
97  break;
98  case 'c':
99  pc = atof(optarg);
100  //cerr << "Vary=" << Add << endl;
101  break;
102  case 'P':
103  wavepoolFile=optarg;
104  //cerr << "Pool=" << wavepoolFile << endl;
105  break;
106  case 'S':
107  seed= atoi(optarg);
108  //cerr << "Seed=" << seed << endl;
109  break;
110  case 'h':
111  usage(progname);
112  return 1;
113  }
114  }
115 
116  //cerr << "Optcount=" << optcount << endl;
117 
121 
122 
123  TString ancestorList(argv[optcount]);
124  ifstream ancfile(ancestorList.Data());
125  while(!ancfile.eof()){
126  TString ancestor;
127  double fitn=0;
128  ancfile >> ancestor >> fitn;
129  ancfile.ignore(200,'\n');
130  if(ancestor.Length()<1 || fitn==0)continue;
131  //cerr << ancestor << " : " << fitn << endl;
132  inputlistFiles.push_back(ancestor);
133  fitness.push_back(fitn);
134  }
135 
136 
137  if(inputlistFiles.size()<1){
138  cerr << "No ancestor wavelists given. Exiting." << endl;
139  return 1;
140  }
141  else {
142  cerr << inputlistFiles.size() << " ancestors." << endl;
143  }
144 
145  TString inputlistFile=inputlistFiles[0];
146 
147 
148  // initialize Random number generator
149  cerr << "Seed =" << seed << endl;
150  gRandom->SetSeed(time(NULL)+seed);
151 
152 
153  // open and read input wavelists (taken from TPWALikelyhood)
154  vector<set<wsetentry>*> ancestorlist;
155  unsigned int na=inputlistFiles.size();
156  unsigned int mingenes=1000;
157  //unsigned int mini=0;
158  for(unsigned int ia=0;ia<na;++ia){
159  ancestorlist.push_back(new set<wsetentry>());
160  readWavelist(*(ancestorlist[ia]),inputlistFiles[ia]);
161  unsigned int n=ancestorlist[ia]->size();
162  //cerr << "Ancestor: "<<inputlistFiles[ia]
163  // << " with "<<n<<" genes." << endl;
164  if(n<mingenes){
165  mingenes=n;
166  //mini=ia;
167  }
168  if(n<Fix){
169  cerr << "Ancestor has not enough genes to fix "
170  <<Fix<<". Fixing all" << endl;
171  Fix=n;
172  }
173  }
174 
178 
179 
180  // build selection histogram (probability distribution of ranked chromosomes)
181  assert(na>=2);
182  TH1I hDist("hDist","hDist",na,0,na-1);
183 
184  // we implement a ranked selection here such that:
185  // Pmax= c/na, Pmin=(2-c)/na, c is selective pressure 1<c<2
186 
187  for(unsigned int ia=0;ia<na;++ia){
188  // reverse ranking:
189  double rank=(double)(na-ia)-1.;
190  double npop=(double)na;
191  double npopa=npop-1;
192  double p=1./npop*(2.-sel+2.*(sel-1)*rank/npopa);
193  //cerr << "Ancestor " << ia << " P="<<p<< endl;
194  hDist.Fill(ia,p*npop);
195  }
196 
197  // Choose two ancestors
198  unsigned int x1=(unsigned int)hDist.GetRandom();
199  unsigned int x2=(unsigned int)hDist.GetRandom();
200 
201  assert(x1>=0 && x1<na);
202  assert(x2>=0 && x2<na);
203 
204  cerr << "Selective Pressure: " << sel << endl;
205  cerr << "Choosing ancestors: " << x1 << " and " << x2 << endl;
206 
207  set<wsetentry>* ancestor1=ancestorlist[x1];
208  set<wsetentry>* ancestor2=ancestorlist[x2];
209 
210 
211 
212 
216 
217  cerr << "Crossover probability: "<< pc << endl;
218 
219  // copy ancestor1
220  set<wsetentry> motherlist;
221  copy(ancestor1->begin(),ancestor1->end(),inserter(motherlist,motherlist.begin()));
222 
223  if(gRandom->Uniform()<pc){
224 
225  cerr << " crossing over ... ";
226  // get two points in the genome
227  set<wsetentry>* smallancestor=NULL;
228  //set<wsetentry>* largeancestor=NULL;
229 
230  if(ancestor1->size()<=ancestor2->size()){
231  smallancestor = ancestor1;
232  //largeancestor = ancestor2;
233  }
234  else {
235  smallancestor = ancestor2;
236  //largeancestor = ancestor1;
237  }
238 
239 
240 
241  unsigned int cp1=gRandom->Integer(smallancestor->size()-Fix)+Fix;
242  unsigned int cp2=gRandom->Integer(smallancestor->size()-Fix)+Fix;
243 
244  unsigned int cpp1 = cp1; // first point which is swapped
245  unsigned int cpp2 = cp2; // second point ...
246  if(cp1>cp2){
247  cpp1 = cp2;
248  cpp2 = cp1;
249  }
250 
251  cerr << " [" << cpp1 << "..." << cpp2 << "]" << endl;
252  // move to cpp1
253  set<wsetentry>::iterator ita1=motherlist.begin();
254  set<wsetentry>::iterator ita2=ancestor2->begin();
255  for(unsigned int ip=0; ip<cpp1; ++ip){
256  ++ita1;++ita2;
257  }
258  // now see if we can exchange genes
259  // (we only exchange if the mother would receive a new gene!
260  for(unsigned int ip=cpp1; ip<cpp2; ++ip){
261  wsetentry gene=*ita2;
262  if(findByName(motherlist,gene)==motherlist.end()){
263  cerr << "crossing gene " << ip << ": "
264  << gene.name << endl;
265  motherlist.erase(*ita1);
266  motherlist.insert(gene);
267  ita1=find(motherlist.begin(),motherlist.end(),gene);
268  }
269  ++ita1;++ita2;
270  }
271  }
272 
276 
277  set<wsetentry> wavepool;
278  readWavelist(wavepool,wavepoolFile);
279 
280  // calculate dimension:
281  unsigned int nwaves=motherlist.size();
282 
283 
284  set<wsetentry> redpool; // reduced_wavepool=wavepool\motherlist
285  //for(unsigned int i=0; i<nwaves;++i)cout<<motherlist[i].first<<endl;
286  set<wsetentry>::iterator it1=wavepool.begin();
287  while(it1!=wavepool.end()){
288  if(find(motherlist.begin(),motherlist.end(),*it1)==motherlist.end()){
289  redpool.insert(*it1);
290  }
291  ++it1;
292  }
293 
294  // we have taken out all waves from the mother list
295  unsigned int npool=redpool.size();
296  cerr << wavepool.size() << " waves in pool." << endl;
297  cerr << nwaves << " waves in ancestor." << endl;
298  cerr << npool << " waves in reduced pool." << endl;
299 
300  set<wsetentry> mutantlist(motherlist);
301 
302  cerr << "Add =" << Add << endl;
303  cerr << "Drop=" << Drop << endl;
304  cerr << "Exch=" << Exch << endl;
305 
306  // draw vary value
307  if(Vary>0){
308  int v= gRandom->Integer(2*Vary)-Vary;
309  cerr << "Var ="<<v<< endl;
310  if(v>0)Add+=v;
311  if(v<0){
312  // frist reduce added
313  if((int)Add<-v){
314  Drop+=-Add-v;
315  Add=0;
316  }
317  else Add+=v;
318  }
319  }
320 
321  cerr << "Add =" << Add << endl;
322  cerr << "Drop=" << Drop << endl;
323 
324  unsigned int exchangable=mutantlist.size()-Fix;
325  if(Exch+Drop>exchangable){
326  Drop=exchangable-Exch;
327  cerr << "Adjusting drop to maximal value "<<Drop<<endl;
328  }
329 
330  // Step1: Remove waves. Prepare for exchange
331  for(unsigned int ie=0;ie<Exch+Drop;++ie){
332  // choose one wave to remove
333  mutantlist.erase(randomEntry(mutantlist,Fix));
334  }
335 
336  cerr << "removed " << Exch+Drop << " waves" << endl;
337 
338  // Setp2: Exchange and add
339  for(unsigned int ie=0;ie<Exch+Add;++ie){
340  set<wsetentry>::iterator it=randomEntry(redpool);
341  wsetentry anentry=*it;
342  anentry.index=nwaves+ie;
343  mutantlist.insert(anentry);
344  redpool.erase(it); // remove from pool to avoid double waves
345  }
346 
347  cerr << "added " << Exch+Add << " waves" << endl;
348 
349  // output
350  set<wsetentry>::iterator it=mutantlist.begin();
351  int count=0;
352  while(it!=mutantlist.end()){
353  // make sure that there are no double entries
354  set<wsetentry>::iterator it2=it;
355  ++it2;
356  if(find(it2,mutantlist.end(),*it)!=mutantlist.end()){
357  cerr << "Double entry " << it->name << endl;
358  ++it;
359  continue;
360  }
361  cout << it->name;
362  if((it->threshold)!=0) cout << " " << it->threshold;
363  cout<<endl;
364  ++count;
365  ++it;
366  }
367  cerr << count << " waves in mutant." << endl;
368  return 0;
369 }