1 module ggplotd.stat; 2 3 version( unittest ) 4 { 5 import dunit.toolkit; 6 } 7 8 import std.traits : isCallable; 9 10 /** 11 Create Aes based on a function 12 13 Params: 14 func = Function 15 min = x coordinate to start from 16 max = x coordinate to end at 17 precision = Number of points to calculate 18 19 Examples: 20 -------------- 21 /// http://blackedder.github.io/ggplotd/images/function.png 22 import std.random : uniform; 23 import std.typecons : Tuple; import ggplotd.stat : statFunction; 24 import ggplotd.ggplotd : GGPlotD; 25 import ggplotd.geom : geomLine, geomPoint; 26 import ggplotd.range : mergeRange; 27 28 auto f = (double x) { return x / (1 + x); }; 29 30 auto aes = statFunction(f, 0.0, 10); 31 auto gg = GGPlotD().put(geomLine(aes)); 32 33 // Generate some noisy points 34 auto f2 = (double x) { return x / (1 + x) * uniform(0.75, 1.25); }; 35 auto aes2 = f2.statFunction(0.0, 10, 25); 36 37 // Show points in different colour 38 auto withColour = Tuple!(string, "colour")("aquamarine").mergeRange(aes2); 39 gg = gg.put(withColour.geomPoint); 40 41 gg.save("function.png"); 42 -------------- 43 44 */ 45 auto statFunction( FUNC, T )( FUNC func, T min, 46 T max, size_t precision = 50 ) if (isCallable!FUNC) 47 { 48 import std.array : array; 49 import std.algorithm : map; 50 import std.range : iota; 51 52 import ggplotd.aes : Aes; 53 auto stepSize = (max-min)/(precision-1); 54 auto xs = [min]; 55 if (stepSize > 0) 56 xs = iota( min, max, stepSize ).array ~ max; 57 auto ys = xs.map!((x) => func(x)); 58 59 return Aes!(typeof(xs), "x", typeof(ys), "y")( xs, ys ); 60 } 61 62 unittest 63 { 64 import std.array : array; 65 import std.algorithm : map; 66 67 auto aes = statFunction( (double x) { return 2*x; }, 0.0, 1, 2 ); 68 assertEqual( aes.map!("a.x").array, [0.0, 1.0] ); 69 assertEqual( aes.map!("a.y").array, [0.0, 2.0] ); 70 71 aes = statFunction( (double x) { return 3*x; }, -1.0, 1, 3 ); 72 assertEqual( aes.map!("a.x").array, [-1.0, 0.0, 1.0] ); 73 assertEqual( aes.map!("a.y").array, [-3.0, 0.0, 3.0] ); 74 } 75 76 /+++++++++++ 77 Binning 78 +++++++++++/ 79 80 private auto binID(T)( T value, T min, T max, T width ) 81 { 82 import std.conv : to; 83 import std.math : floor; 84 assert( min != max, "Minimum value of bin should differ from maximum" ); 85 assert( width != 0, "Bin width can't be 0" ); 86 87 import std.stdio : writeln; 88 if ( !(value <= max && value >= min) ) 89 writeln( value, " ", min, " ", max ); 90 91 assert( value <= max && value >= min, "Value must be within given range" ); 92 if (value==max) // Corner case for highest value 93 value -= 0.1*width; // Might not work correctly for integers 94 auto id = floor((value - min) / width); 95 return id.to!int; 96 } 97 98 unittest 99 { 100 import std.algorithm : map; 101 import std.array : array; 102 assertEqual( 103 [0.1,0.3,0.6,0.2,0.4].map!((a) => a.binID( 0, 0.6, 0.2 )).array, 104 [0,1,2,1,2] 105 ); 106 // TODO: add tests for integers etc. 107 } 108 109 private auto idToRange(T)( size_t value, T min, T width ) 110 { 111 auto base = min + value*width; 112 return [ base, base + width ]; 113 } 114 115 // Multidimensional bin. Data either is range of range, with each subrange 116 // a set of data. TODO: should we treat one dimensional data differently? 117 private template bin(DATA) 118 { 119 struct Bin 120 { 121 double[][] range; 122 size_t count; 123 } 124 125 auto bin(DATA data, double[] mins, double[] maxs, 126 size_t[] noBins) 127 { 128 import std.range : zip; 129 import std.algorithm : filter, all, group, map; 130 import ggplotd.range : groupBy; 131 assert( noBins.all!((a) => a > 0), "noBins must be larger than 0" ); 132 133 auto widths = zip(mins, maxs, noBins).map!((t) => (t[1]-t[0])/(t[2])); 134 135 auto binIDs = data 136 .filter!((sample) 137 { 138 return zip(sample, mins, maxs). 139 all!( (args) => (args[0] >= args[1] && args[0] <= args[2])); 140 }) 141 .map!((sample) 142 { 143 import std.array : array; // Needed for groupBy to work correctly 144 return zip(sample, mins, maxs, widths) 145 .map!( (args) => binID( args[0], args[1], args[2], args[3] ) ).array; 146 } ); 147 148 import std.typecons : tuple; 149 150 return binIDs.groupBy!((a) => tuple(a)).values.map!((g) 151 { 152 import std.array : array; 153 Bin bin; 154 bin.count = g.length; 155 bin.range = zip( g[0], mins, widths ) 156 .map!((args) => idToRange( args[0], args[1], args[2] ) ) 157 .array; 158 return bin; 159 }); 160 } 161 } 162 163 unittest { 164 import std.algorithm : reduce; 165 auto bins = bin( [[0.0],[0.1],[0.2],[0.2],[0.01],[0.3]], [0.0], [0.3], [10] ); 166 assertEqual( 6, reduce!((a,b) => a += b.count )( 0, bins ) ); 167 auto bins2 = bin( [[0.0,100],[0.1,101],[0.2,109],[0.01,110],[0.3,103.1]], [0.0,100], [0.3,110], [10,10] ); 168 assertEqual( 5, reduce!((a,b) => a += b.count )( 0, bins2 ) ); 169 } 170 171 private auto bin(DATA)(DATA data, double min, double max, 172 size_t noBins) 173 { 174 import std.algorithm : map; 175 return bin( data.map!((d) => [d]), [min], [max], [noBins] ); 176 } 177 178 unittest { 179 import std.algorithm : reduce; 180 auto bins = bin( [0.0,0.1,0.2,0.2,0.01,0.3], 0.0, 0.3, 10 ); 181 assertEqual( 6, reduce!((a,b) => a += b.count )( 0, bins ) ); 182 183 import std.algorithm : map; 184 import std.array : array; 185 import std.random : uniform; 186 import std.range : iota, walkLength; 187 auto xs = iota(0,10,1).map!((i)=>uniform(0,0.75)+uniform(0.25,1)).array; 188 auto binsR = bin( xs, 0.0, 2.0, 5 ); 189 assertEqual( xs.walkLength, reduce!((a,b) => a += b.count )( 0, binsR ) ); 190 191 binsR = bin( xs, 0.0, 1.0, 5 ); 192 assertLessThanOrEqual( binsR.walkLength, 5 ); 193 } 194 195 private template statHistND(int dim, AES) 196 { 197 import std.algorithm : map; 198 import ggplotd.aes : Aes; 199 import ggplotd.range : mergeRange; 200 201 struct VolderMort(AESV) 202 { 203 private import std.range : ElementType; 204 private import std.typecons : Tuple; 205 private import ggplotd.aes : group; 206 207 typeof(group(AESV.init)) grouped; 208 ElementType!(ElementType!(typeof(grouped))) defaults; 209 typeof(bin!(double[][])((double[][]).init, (double[]).init, 210 (double[]).init, (size_t[]).init)) 211 binned; 212 213 size_t[] _noBins; 214 double[] mins; 215 double[] maxs; 216 217 this( AESV aes, size_t[] noBins ) 218 { 219 import std.algorithm : min, max, reduce; 220 import std.array : array; 221 import std.conv : to; 222 import std.range : empty, popFront, front, take, walkLength; 223 import std.typecons : tuple; 224 225 import ggplotd.algorithm : safeMin, safeMax; 226 import ggplotd.range : uniquer; 227 228 static assert( dim == 1 || dim == 2, "Only dimension of 1 or 2 i supported" ); 229 230 _noBins = noBins; 231 static if (dim == 1) { 232 if (_noBins[0] < 1) 233 _noBins[0] = min(aes.map!((a) => a.x.to!double) 234 .uniquer.take(30).walkLength, 235 min(30,max(11, aes.walkLength/10))); 236 auto seed = tuple( 237 aes.front.x.to!double, aes.front.x.to!double ); 238 auto minmax = reduce!((a,b) => safeMin(a,b.x.to!double), 239 (a,b) => safeMax(a,b.x.to!double))(seed,aes); 240 mins = [minmax[0]]; 241 maxs = [minmax[1]]; 242 } 243 else 244 { 245 if (_noBins[0] < 1) 246 _noBins[0] = min(aes.map!((a) => a.x.to!double) 247 .uniquer.take(30).walkLength, 248 min(30,max(11, aes.walkLength/25))); 249 if (_noBins[1] < 1) 250 _noBins[1] = min(aes.map!((a) => a.y.to!double) 251 .uniquer.take(30).walkLength, 252 min(30,max(11, aes.walkLength/25))); 253 auto seed = tuple( 254 aes.front.x.to!double, aes.front.x.to!double, 255 aes.front.y.to!double, aes.front.y.to!double ); 256 auto minmax = reduce!( 257 (a,b) => safeMin(a,b.x.to!double), 258 (a,b) => safeMax(a,b.x.to!double), 259 (a,b) => safeMin(a,b.y.to!double), 260 (a,b) => safeMax(a,b.y.to!double) 261 )(seed,aes); 262 mins = [minmax[0],minmax[2]]; 263 maxs = [minmax[1],minmax[3]]; 264 } 265 266 grouped = group(aes); 267 268 foreach( i; 0..mins.length ) { 269 if (mins[i] == maxs[i]) { 270 mins[i] -= 0.5; 271 maxs[i] += 0.5; 272 } 273 } 274 275 defaults = grouped.front.front; 276 static if (dim == 1) 277 auto data = grouped.front.map!((t) => [t.x.to!double]); // Extract the x coordinates 278 else 279 auto data = grouped.front.map!((t) => [t.x.to!double,t.y.to!double]); // Extract the x coordinates 280 binned = bin( 281 data.array, 282 mins, maxs, 283 _noBins); 284 assert( !grouped.empty, "Groups should not be empty" ); 285 grouped.popFront; 286 } 287 288 @property bool empty() { 289 import std.range : empty; 290 return (grouped.empty && binned.empty); } 291 292 @property auto front() 293 { 294 import ggplotd.aes : merge; 295 import std.conv : to; 296 static if (dim == 1) 297 { 298 auto w = binned.front.range[0][1]-binned.front.range[0][0]; 299 return defaults.merge( 300 Tuple!(double, "x", double, "y", double, "width", double, "height")( 301 binned.front.range[0][0] + 0.5*w, 0.5*binned.front.count, 302 w, binned.front.count) ); 303 } 304 else 305 { 306 // Returning polygons. In theory could also return rectangle, but their lines 307 // will overlap which will make it look wrong. Ideally would be able to set 308 // linewidth to 0 for the rectangles and that would solve it. 309 return [ 310 defaults.merge( 311 Tuple!(double, "x", double, "y", double, "colour")( 312 binned.front.range[0][0], binned.front.range[1][0], 313 binned.front.count.to!double)), 314 315 defaults.merge( 316 Tuple!(double, "x", double, "y", double, "colour")( 317 binned.front.range[0][1], binned.front.range[1][0], 318 binned.front.count.to!double)), 319 320 defaults.merge( 321 Tuple!(double, "x", double, "y", double, "colour")( 322 binned.front.range[0][1], binned.front.range[1][1], 323 binned.front.count.to!double)), 324 325 defaults.merge( 326 Tuple!(double, "x", double, "y", double, "colour")( 327 binned.front.range[0][0], binned.front.range[1][1], 328 binned.front.count.to!double)), 329 330 ]; 331 } 332 } 333 334 void popFront() 335 { 336 import std.array : array; 337 import std.conv : to; 338 import std.range : empty, front, popFront; 339 if (!binned.empty) 340 binned.popFront; 341 if (binned.empty && !grouped.empty) 342 { 343 defaults = grouped.front.front; 344 static if (dim == 1) // Extract the coordinates 345 auto data = grouped.front.map!((t) => [t.x.to!double]); 346 else 347 auto data = grouped.front.map!((t) => [t.x.to!double,t.y.to!double]); 348 binned = bin(data.array, mins, maxs, _noBins); 349 grouped.popFront; 350 } 351 } 352 } 353 354 auto statHistND(AES aesRange, size_t[] noBins) 355 { 356 // Get maxs, mins and noBins 357 return VolderMort!(typeof(aesRange))( aesRange, noBins ); 358 } 359 } 360 361 /** 362 Create Aes that specifies the bins to draw an histogram 363 364 Params: 365 aesRaw = Data that the histogram will be based on 366 noBins = Optional number of bins. On a value of 0 the number of bins will be chosen automatically. 367 368 Returns: Range that holds rectangles representing different bins 369 */ 370 auto statHist(AES)(AES aesRaw, size_t noBins = 0) 371 { 372 return statHistND!(1,AES)( aesRaw, [noBins] ); 373 } 374 375 unittest 376 { 377 import std.algorithm : each, map, sort; 378 import std.array : array; 379 import std.random : uniform; 380 import std.range : iota, walkLength; 381 382 import ggplotd.aes : Aes; 383 384 auto xs = iota(0,100,1).map!((a) => uniform(0,6) ).array; 385 auto sh = statHist( Aes!(int[], "x")(xs) ); 386 auto binXs = sh.map!((b) => b.x).array.sort().array; 387 assertEqual( binXs.length, 6 ); 388 389 // Test single point 390 xs = [1]; 391 sh = statHist( Aes!(int[], "x")(xs) ); 392 assertEqual(sh.walkLength, 1); 393 } 394 395 /** 396 Create Aes that specifies the bins to draw an histogram 397 398 Params: 399 aesRange = Data that the histogram will be based on 400 noBinsX = Optional number of bins for x axis. On a value of 0 the number of bins will be chosen automatically. 401 noBinsY = Optional number of bins for y axis. On a value of 0 the number of bins will be chosen automatically. 402 403 Returns: Range that holds rectangles representing different bins 404 */ 405 auto statHist2D(AES)(AES aesRange, size_t noBinsX = 0, size_t noBinsY = 0) 406 { 407 return statHistND!(2,AES)( aesRange, [noBinsX, noBinsY]); 408 } 409 410 /** 411 Calculate kernel density for given data 412 413 Params: 414 aesRange = Data that the histogram will be based on 415 416 Returns: InputRange that holds x and y coordinates for the kernel 417 */ 418 auto statDensity(AES)( AES aesRange ) 419 { 420 import std.algorithm : joiner, map, min, max, reduce; 421 import std.conv : to; 422 import std.range : chain, front; 423 import std.typecons : tuple, Tuple; 424 import ggplotd.aes : Aes, group; 425 import ggplotd.range : mergeRange; 426 import ggplotd.algorithm : safeMin, safeMax; 427 428 auto minmax = reduce!( 429 (a,b) => safeMin(a,b.x.to!double), 430 (a,b) => safeMax(a,b.x.to!double))(tuple(double.init,double.init), aesRange); 431 auto margin = (minmax[1] - minmax[0])/10.0; 432 minmax[0] -= margin; 433 minmax[1] += margin; 434 435 436 437 return aesRange.group.map!((g) { 438 auto xs = g.map!((t) => t.x.to!double); 439 440 // Calculate the kernel width (using scott thing in dstats) 441 // Initialize kernel with normal distribution. 442 import std.math : isFinite; 443 import dstats.kerneldensity : scottBandwidth, KernelDensity; 444 import dstats.distrib : normalPDF; 445 auto sigma = scottBandwidth(xs); 446 if (!isFinite(sigma) || sigma <= 0) 447 sigma = 0.5; 448 auto kernel = (double x) { return normalPDF(x, 0.0, sigma); }; 449 auto density = KernelDensity.fromCallable(kernel, xs); 450 451 // Use statFunction with the kernel to produce a line 452 // Also add points to close the path down to zero 453 auto coords = chain( 454 [Tuple!(double, "x", double, "y")( minmax[0], 0.0 )], 455 statFunction( density, minmax[0], minmax[1] ), 456 [Tuple!(double, "x", double, "y")( minmax[1], 0.0 )]); 457 458 459 return g.front.mergeRange( coords ); 460 }).joiner; 461 } 462 463 /// 464 unittest 465 { 466 import std.stdio : writeln; 467 import std.algorithm : map; 468 import std.array : array; 469 import std.random : uniform; 470 import std.range : chain, iota, repeat, walkLength; 471 472 import ggplotd.aes : Aes; 473 474 auto xs = iota(0,100,1) 475 .map!((i)=>uniform(0,0.75)+uniform(0.25,1)) 476 .array; 477 478 auto dens = statDensity( Aes!( typeof(xs), "x")( xs ) ); 479 auto dim = dens.walkLength; 480 assertGreaterThan( dim, 1 ); 481 482 // Test that grouping leads to longer (twice as long) result 483 auto cols = chain("a".repeat(50),"b".repeat(50) ); 484 auto dens2 = statDensity( Aes!(typeof(cols), "colour", typeof(xs), "x")( cols, xs ) ); 485 assertGreaterThan( dens2.walkLength, 1.9*dim ); 486 assertLessThan( dens2.walkLength, 2.1*dim ); 487 488 // Test that colour is passed through (merged) 489 assertEqual( dens2.front.colour.length, 1 ); 490 491 // Test single point 492 xs = [1]; 493 dens = statDensity( Aes!( typeof(xs), "x")( xs ) ); 494 assertEqual(dens.walkLength, 3); 495 } 496 497 /** 498 Calculate kernel density for given x and y data 499 500 Params: 501 aesRange = Data that the histogram will be based on 502 503 Returns: Range of ranges that holds polygons for the kernel 504 */ 505 auto statDensity2D(AES)( AES aesRange ) 506 { 507 import std.algorithm : joiner, map, min, max, reduce; 508 import std.array : array; 509 import std.conv : to; 510 import std.range : chain, front, iota, zip; 511 import std.meta : Erase; 512 import std.typecons : tuple, Tuple; 513 import ggplotd.aes : Aes, group, DefaultGroupFields; 514 import ggplotd.algorithm : safeMin, safeMax; 515 import ggplotd.range : mergeRange; 516 517 auto minmax = reduce!( 518 (a,b) => safeMin(a,b.x.to!double), 519 (a,b) => safeMax(a,b.x.to!double), 520 (a,b) => safeMin(a,b.y.to!double), 521 (a,b) => safeMax(a,b.y.to!double), 522 )(tuple(double.init,double.init,double.init,double.init), aesRange); 523 524 if (minmax[0] == minmax[1]) { 525 minmax[0] -= 0.5; 526 minmax[1] += 0.5; 527 } 528 if (minmax[2] == minmax[3]) { 529 minmax[2] -= 0.5; 530 minmax[3] += 0.5; 531 } 532 533 return aesRange.group!(Erase!("colour",DefaultGroupFields)).map!((g) { 534 auto xs = g.map!((t) => t.x.to!double); 535 auto ys = g.map!((t) => t.y.to!double); 536 537 import std.math : isFinite; 538 // Calculate the kernel width (using scott thing in dstats) 539 // Initialize kernel with normal distribution. 540 import dstats.kerneldensity : scottBandwidth, KernelDensity; 541 import dstats.distrib : normalPDF; 542 auto sigmaX = scottBandwidth(xs); 543 if (!isFinite(sigmaX) || sigmaX <= 0) 544 sigmaX = 1e-5; 545 auto sigmaY = scottBandwidth(ys); 546 if (!isFinite(sigmaY) || sigmaY <= 0) 547 sigmaY = 1e-5; 548 549 auto kernel = (double x, double y) { return normalPDF(x, 0.0, sigmaX)* 550 normalPDF(y, 0.0, sigmaY); }; 551 auto density = KernelDensity.fromCallable(kernel, xs, ys); 552 553 auto marginX = (minmax[1] - minmax[0])/10.0; 554 auto marginY = (minmax[3] - minmax[2])/10.0; 555 556 minmax[0] -= marginX; 557 minmax[1] += marginX; 558 minmax[2] -= marginY; 559 minmax[3] += marginY; 560 561 562 // Use statFunction with the kernel to produce a line 563 // Also add points to close the path down to zero 564 auto stepX = (minmax[1] - minmax[0])/25.0; 565 auto stepY = (minmax[3] - minmax[2])/25.0; 566 auto xCoords = iota( minmax[0], minmax[1], stepX ).array ~ minmax[1]; 567 auto yCoords = iota( minmax[2], minmax[3], stepY ).array ~ minmax[3]; 568 auto coords = zip(xCoords, xCoords[1..$]).map!( (xr) { 569 return zip(yCoords, yCoords[1..$]).map!( (yr) { 570 // Two polygons 571 return [ 572 g.front.mergeRange(Aes!( double[], "x", double[], "y", double[], "colour" ) 573 ([xr[0], xr[0], xr[1]], 574 [yr[0], yr[1], yr[1]], 575 [density(xr[0],yr[0]),density(xr[0],yr[1]),density(xr[1],yr[1])])), 576 g.front.mergeRange( 577 Aes!( double[], "x", double[], "y", double[], "colour" ) 578 ([xr[0], xr[1], xr[1]], 579 [yr[0], yr[0], yr[1]], 580 [density(xr[0],yr[0]),density(xr[1],yr[0]),density(xr[1],yr[1])])) 581 ]; 582 }).joiner; 583 }).joiner; 584 return coords; 585 //return g; 586 }).joiner; 587 } 588 589 /// 590 unittest 591 { 592 import std.algorithm : map; 593 import std.array : array; 594 import std.random : uniform; 595 import std.range : iota, walkLength; 596 597 import ggplotd.aes : Aes; 598 auto xs = iota(0,500,1).map!((x) => uniform(0.0,5)+uniform(0.0,5)) 599 .array; 600 auto ys = iota(0,500,1).map!((y) => uniform(1.0,1.5)+uniform(1.0,1.5)) 601 .array; 602 auto aes = Aes!(typeof(xs), "x", typeof(ys), "y")( xs, ys); 603 604 auto sD = statDensity2D( aes ); 605 assertGreaterThan( sD.walkLength, 1150 ); 606 assertLessThan( sD.walkLength, 1450 ); 607 assertEqual( sD.front.walkLength, 3 ); 608 609 // One value 610 xs = [1]; 611 ys = [2]; 612 aes = Aes!(typeof(xs), "x", typeof(ys), "y")( xs, ys); 613 614 sD = statDensity2D( aes ); 615 assertGreaterThan(sD.walkLength, 0); 616 }