PolyMathOrg/DataFrame | Build 13409391746 | src/DataFrame/DataFrame.class.st | Coveralls

165

DataFrame class >> new: aPoint [

5✔

166

5✔

167

        ^ super new initialize: aPoint

5✔

168

5✔

171

DataFrame class >> withColumnNames: anArrayOfColumnNames [

5✔

172

        "Create an empty data frame with given column names"

5✔

173

        | numberOfColumns df |

5✔

174

5✔

175

        numberOfColumns := anArrayOfColumnNames size.

5✔

176

        df := self new: 0 @ numberOfColumns.

5✔

177

5✔

178

        df columnNames: anArrayOfColumnNames.

5✔

179

        ^ df

5✔

180

5✔

183

DataFrame class >> withColumnNames: anArrayOfColumnNames withRowNames: anArrayOfRowNames [

5✔

184

        "Create an empty data frame with given column and row names"

5✔

185

5✔

186

        | numberOfColumns numberOfRows df |

5✔

187

5✔

188

        numberOfColumns := anArrayOfColumnNames size.

5✔

189

        numberOfRows := anArrayOfRowNames size.

5✔

190

        df := self new: numberOfRows @ numberOfColumns.

5✔

191

5✔

192

        df columnNames: anArrayOfColumnNames.

5✔

193

        df rowNames: anArrayOfRowNames.

5✔

194

        ^ df

5✔

195

5✔

198

DataFrame class >> withColumns: anArrayOfArrays [

5✔

199

5✔

200

        ^ self new initializeColumns: anArrayOfArrays

5✔

201

5✔

204

DataFrame class >> withColumns: anArrayOfArrays columnNames: anArrayOfColumnNames [

5✔

205

5✔

206

        | df |

5✔

207

        df := self withColumns: anArrayOfArrays.

5✔

208

        df columnNames: anArrayOfColumnNames.

5✔

209

        ^ df

5✔

210

5✔

213

DataFrame class >> withColumns: anArrayOfArrays rowNames: anArrayOfRowNames [

5✔

214

        ^ anArrayOfArrays

5✔

215

                ifNotEmpty: [ (self withColumns: anArrayOfArrays)

5✔

216

                                rowNames: anArrayOfRowNames;

5✔

217

                                yourself ]

5✔

218

                ifEmpty: [ self withRowNames: anArrayOfRowNames ]

5✔

219

5✔

222

DataFrame class >> withColumns: anArrayOfArrays rowNames: anArrayOfRowNames columnNames: anArrayOfColumnNames [

5✔

223

        ^ anArrayOfArrays

5✔

224

                ifNotEmpty: [ (self withColumns: anArrayOfArrays)

5✔

225

                                rowNames: anArrayOfRowNames;

5✔

226

                                columnNames: anArrayOfColumnNames;

5✔

227

                                yourself ]

5✔

228

                ifEmpty: [ self withRowNames: anArrayOfRowNames ]

5✔

229

5✔

232

DataFrame class >> withDataFrameInternal: aDataFrameIndernal rowNames: rows columnNames: columns [

5✔

233

5✔

234

        ^ self new

5✔

235

                initializeContents: aDataFrameIndernal

5✔

236

                rowNames: rows

5✔

237

                columnNames: columns

5✔

238

5✔

241

DataFrame class >> withRowNames: anArrayOfRowNames [

5✔

242

        "Create an empty data frame with given row names"

5✔

243

        | numberOfRows df |

5✔

244

5✔

245

        numberOfRows := anArrayOfRowNames size.

5✔

246

        df := self new: numberOfRows @ 0.

5✔

247

5✔

248

        df rowNames: anArrayOfRowNames.

5✔

249

        ^ df

5✔

250

5✔

253

DataFrame class >> withRowNames: anArrayOfRowNames columnNames: anArrayOfColumnNames [

5✔

254

        "Create an empty data frame with given row and column names"

5✔

255

        | numberOfRows numberOfColumns df |

5✔

256

5✔

257

        numberOfRows := anArrayOfRowNames size.

5✔

258

        numberOfColumns := anArrayOfColumnNames size.

5✔

259

5✔

260

        df := self new: numberOfRows @ numberOfColumns.

5✔

261

5✔

262

        df rowNames: anArrayOfRowNames.

5✔

263

        df columnNames: anArrayOfColumnNames.

5✔

264

5✔

265

        ^ df

5✔

266

5✔

269

DataFrame class >> withRows: anArrayOfArrays [

5✔

270

5✔

271

        ^ self new initializeRows: anArrayOfArrays

5✔

272

5✔

275

DataFrame class >> withRows: anArrayOfArrays columnNames: anArrayOfColumnNames [

5✔

276

        ^ anArrayOfArrays

5✔

277

                ifNotEmpty: [ (self withRows: anArrayOfArrays)

5✔

278

                                columnNames: anArrayOfColumnNames;

5✔

279

                                yourself ]

5✔

280

                ifEmpty: [ self withColumnNames: anArrayOfColumnNames ]

5✔

281

5✔

284

DataFrame class >> withRows: anArrayOfArrays rowNames: anArrayOfRowNames [

5✔

285

5✔

286

        | df |

5✔

287

        df := self withRows: anArrayOfArrays.

5✔

288

        df rowNames: anArrayOfRowNames.

5✔

289

        ^ df

5✔

290

5✔

293

DataFrame class >> withRows: anArrayOfArrays rowNames: anArrayOfRowNames columnNames: anArrayOfColumnNames [

5✔

294

        ^ anArrayOfArrays

5✔

295

                ifNotEmpty: [ (self withRows: anArrayOfArrays)

5✔

296

                                rowNames: anArrayOfRowNames;

5✔

297

                                columnNames: anArrayOfColumnNames;

5✔

298

                                yourself ]

5✔

299

                ifEmpty: [ self withColumnNames: anArrayOfColumnNames ]

5✔

300

5✔

303

DataFrame >> , aDataFrame [

5✔

304

5✔

305

        | dataFrame rows |

5✔

306

        self columnNames = aDataFrame columnNames ifFalse: [ self error: 'Not yet supported.' ].

5✔

307

        (self rowNames includesAny: aDataFrame rowNames) ifTrue: [ self error: 'Not yet supported.' ].

5✔

308

5✔

309

        dataFrame := self copy.

5✔

310

        rows := aDataFrame asArrayOfRows.

5✔

311

        aDataFrame rowNames doWithIndex: [ :name :index | dataFrame addRow: (rows at: index) named: name ].

5✔

312

5✔

313

        ^ dataFrame

5✔

314

5✔

317

DataFrame >> = aDataFrame [

5✔

318

5✔

319

        "Most objects will fail here"

5✔

320

        aDataFrame species = self species

5✔

321

                ifFalse: [ ^ false ].

5✔

322

5✔

323

        "This is the fastest way for two data frames with different dimensions"

5✔

324

        aDataFrame dimensions = self dimensions

5✔

325

                ifFalse: [ ^ false ].

5✔

326

5✔

327

        "If the names are different we don't need to iterate through values"

5✔

328

        (aDataFrame rowNames = self rowNames

5✔

329

                and: [ aDataFrame columnNames = self columnNames ])

5✔

330

                ifFalse: [ ^ false ].

5✔

331

5✔

332

        ^ aDataFrame contents = self contents

5✔

333

5✔

336

DataFrame >> add: aDataSeries [

5✔

337

5✔

338

        "Add DataSeries as a new row at the end"

5✔

339

5✔

340

        self flag:

5✔

341

                'This mathod name is not correct. It is misleading. We should think if we should delete it or keep it'.

5✔

342

        self addRow: aDataSeries

5✔

343

5✔

346

DataFrame >> addColumn: aDataSeries [

5✔

347

        "Add DataSeries as a new column at the end"

5✔

348

5✔

349

        "(#(#(1 2) #(3 4)) asDataFrame addColumn: #(5 6) asDataSeries named: 3) >>> (#(#(1 2 5) #(3 4 6)) asDataFrame)"

5✔

350

5✔

351

        "(#(#(r1c1 r1c2)) asDataFrame addColumn: #(r1c3) asDataSeries named: 3) >>> (#(#(r1c1 r1c2 r1c3)) asDataFrame)"

5✔

352

5✔

353

        self addColumn: aDataSeries named: aDataSeries name.

5✔

354

        self dataTypes

5✔

355

                at: aDataSeries name

5✔

356

                put: aDataSeries calculateDataType

5✔

357

5✔

360

DataFrame >> addColumn: aDataSeries atPosition: aNumber [

5✔

361

        "Add DataSeries as a new column at the given position"

5✔

362

5✔

363

        "(#(#(1 2) #(3 4)) asDataFrame addColumn: #(5 6) asDataSeries named: 3 atPosition: 3) >>> (#(#(1 2 5) #(3 4 6)) asDataFrame)"

5✔

364

5✔

365

        "(#(#(r1c1 r1c2)) asDataFrame addColumn: #(r1c3) asDataSeries named: 3 atPosition: 3) >>> (#(#(r1c1 r1c2 r1c3)) asDataFrame)"

5✔

366

5✔

367

        self

5✔

368

                addColumn: aDataSeries asArray

5✔

369

                named: aDataSeries name

5✔

370

                atPosition: aNumber

5✔

371

5✔

374

DataFrame >> addColumn: anArray named: aString [

5✔

375

        "Add a new column at the end"

5✔

376

        self addColumn: anArray named: aString atPosition: self numberOfColumns + 1

5✔

377

5✔

380

DataFrame >> addColumn: anArray named: aString atPosition: aNumber [

5✔

381

        "Add a new column at the given position"

5✔

382

        (self columnNames includes: aString)

5✔

383

                ifTrue: [ Error signal: 'A column with that name already exists' ].

5✔

384

5✔

385

        contents addColumn: anArray asArray atPosition: aNumber.

5✔

386

        columnNames add: aString afterIndex: aNumber - 1.

5✔

387

        dataTypes at: aString put: (anArray asDataSeries calculateDataType)

5✔

388

5✔

391

DataFrame >> addEmptyColumnNamed: aString [

5✔

392

        "Add an empty column at the end"

5✔

393

        self addEmptyColumnNamed: aString atPosition: self numberOfColumns + 1

5✔

394

5✔

397

DataFrame >> addEmptyColumnNamed: aString atPosition: aNumber [

5✔

398

        "Add an empty column at the given position"

5✔

399

        self addColumn: (Array new: self numberOfRows) named: aString atPosition: aNumber

5✔

400

5✔

403

DataFrame >> addEmptyRowNamed: aString [

5✔

404

        "Add an empty row at the end"

5✔

405

        self addEmptyRowNamed: aString atPosition: self numberOfRows + 1

5✔

406

5✔

409

DataFrame >> addEmptyRowNamed: aString atPosition: aNumber [

5✔

410

        "Add an empty row at the given position"

5✔

411

        self addRow: (Array new: self numberOfColumns) named: aString atPosition: aNumber

5✔

412

5✔

415

DataFrame >> addRow: aDataSeries [

5✔

416

        "Add DataSeries as a new row at the end"

5✔

417

5✔

418

        "(#(#(1 2) #(3 4)) asDataFrame addRow: #(5 6) asDataSeries named: 3) >>> (#(#(1 2) #(3 4) #(5 6)) asDataFrame)"

5✔

419

5✔

420

        "(#(#(r1c1 r1c2)) asDataFrame addRow: #(r2c1 r2c2) asDataSeries named: 2) >>> (#(#(r1c1 r1c2 ) #(r2c1 r2c2)) asDataFrame)"

5✔

421

5✔

422

        self addRow: aDataSeries atPosition: self numberOfRows + 1

5✔

423

5✔

426

DataFrame >> addRow: aDataSeries atPosition: aNumber [

5✔

427

        "Add DataSeries as a new row at the given position"

5✔

428

5✔

429

        "(#(#(1 2) #(3 4)) asDataFrame addRow: #(5 6) asDataSeries named: 3 atPosition: 3) >>> (#(#(1 2) #(3 4) #(5 6)) asDataFrame)"

5✔

430

5✔

431

        "(#(#(r1c1 r1c2)) asDataFrame addRow: #(r2c1 r2c2) asDataSeries named: 2 atPosition: 2) >>> (#(#(r1c1 r1c2 ) #(r2c1 r2c2)) asDataFrame)"

5✔

432

5✔

433

        | row |

5✔

434

        row := Array new: self columnNames size.

5✔

435

        self columnNames withIndexDo: [ :columnName :index |

5✔

436

                | value |

5✔

437

                value := aDataSeries

5✔

438

                                 at: columnName

5✔

439

                                 ifAbsent: [ aDataSeries atIndex: index ].

5✔

440

                row at: index put: value ].

5✔

441

        self addRow: row named: aDataSeries name atPosition: aNumber

5✔

442

5✔

445

DataFrame >> addRow: anArray named: aString [

5✔

446

        "Add a new row at the end"

5✔

447

        self addRow: anArray named: aString atPosition: self numberOfRows + 1

5✔

448

5✔

451

DataFrame >> addRow: anArray named: aString atPosition: aNumber [

5✔

452

        "Add a new row at the given position"

5✔

453

        (self rowNames includes: aString)

5✔

454

                ifTrue: [ Error signal: 'A row with that name already exists' ].

5✔

455

5✔

456

        contents addRow: anArray atPosition: aNumber.

5✔

457

        rowNames add: aString afterIndex: aNumber - 1

5✔

458

5✔

461

DataFrame >> applyElementwise: aBlock [

5✔

462

        "Applies a given block to all columns of a data frame"

5✔

463

5✔

464

        "(#(#(1 2) #(3 4)) asDataFrame applyElementwise:[ :x | x - 1 ]) >>> (#(#(0 1) #(2 3)) asDataFrame)"

5✔

465

5✔

466

        self toColumns: self columnNames applyElementwise: aBlock

5✔

467

5✔

480

DataFrame >> applyToAllColumns: aSymbol [

5✔

481

"Sends the unary selector, aSymbol, to all columns of DataFrame and collects the result into a DataSeries object. Used by statistical functions of DataFrame"

5✔

482

5✔

483

        | series column |

5✔

484

5✔

485

        series := DataSeries withValues:

5✔

486

                (self columnNames collect: [ :colName |

5✔

487

                        column := self column: colName.

5✔

488

                        column perform: aSymbol ]).

5✔

489

5✔

490

        series name: aSymbol.

5✔

491

        series keys: self columnNames.

5✔

492

5✔

493

        ^ series

5✔

494

5✔

495

5✔

496

{ #category : 'converting' }

5✔

497

DataFrame >> asArray [

5✔

498

        "Converts DataFrame to the array of rows"

5✔

499

5✔

500

        "(#(#(1 2) #(3 4)) asDataFrame asArray) >>> (#(#(1 2) #(3 4)))"

5✔

501

5✔

502

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame asArray) >>> (#(#(r1c1 r1c2) #(r2c1 r2c2)))"

5✔

503

5✔

504

        ^ self asArrayOfRows

5✔

505

5✔

506

5✔

507

{ #category : 'converting' }

5✔

508

DataFrame >> asArrayOfColumns [

5✔

509

        "Converts DataFrame to the array of columns"

5✔

510

5✔

511

        "(#(#(1 2) #(3 4)) asDataFrame asArrayOfColumns) >>> (#(#(1 3) #(2 4)))"

5✔

512

5✔

513

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame asArrayOfColumns) >>> (#(#(r1c1 r2c1) #(r1c2 r2c2)))"

5✔

514

5✔

515

        ^ contents asArrayOfColumns

5✔

516

5✔

517

5✔

518

{ #category : 'converting' }

5✔

519

DataFrame >> asArrayOfRows [

5✔

520

        "Converts DataFrame to the array of rows"

5✔

521

5✔

522

        "(#(#(1 2) #(3 4)) asDataFrame asArrayOfRows) >>> (#(#(1 2) #(3 4)))"

5✔

523

5✔

524

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame asArrayOfRows) >>> (#(#(r1c1 r1c2) #(r2c1 r2c2)))"

5✔

525

5✔

526

        ^ contents asArrayOfRows

5✔

527

5✔

528

5✔

529

{ #category : 'converting' }

5✔

530

DataFrame >> asArrayOfRowsWithName [

5✔

531

        "Answer an OrderedCollection where each item is an Array with:

5✔

532

        - the name of that row, in first place,

5✔

533

        - the contents of that row.

5✔

534

5✔

535

5✔

536

        ^ self rowNames withIndexCollect: [ :name :index |

5✔

537

                Array streamContents: [ :stream |

5✔

538

                        stream nextPut: name;

5✔

539

                                nextPutAll: (self at: index) ] ]

5✔

540

5✔

541

5✔

542

{ #category : 'accessing' }

5✔

543

DataFrame >> at: aNumber [

5✔

544

        "Returns the row of a DataFrame at row index aNumber"

5✔

545

5✔

546

        "(#(#(1 2) #(3 4)) asDataFrame at: 1) >>> (#(1 2) asDataSeries)"

5✔

547

5✔

548

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame at: 2) >>> (#(r2c1 r2c2) asDataSeries)"

5✔

549

5✔

550

        ^ self rowAt: aNumber

5✔

551

5✔

552

5✔

553

{ #category : 'accessing' }

5✔

554

DataFrame >> at: rowNumber at: columnNumber [

5✔

555

        "Returns the value whose row index is rowNumber and column index is columnNumber"

5✔

556

5✔

557

        "(#(#(1 2) #(3 4)) asDataFrame at: 1 at:1) >>> 1"

5✔

558

5✔

559

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame at: 2 at: 1) >>> #r2c1"

5✔

560

5✔

561

        ^ contents at: rowNumber at: columnNumber

5✔

562

5✔

563

5✔

564

{ #category : 'accessing' }

5✔

565

DataFrame >> at: rowNumber at: columnNumber put: value [

5✔

566

        "Replaces the original value of a DataFrame at row index rowNumber and column index columnNumber with a given value"

5✔

567

5✔

568

        "(#(#(1 2) #(3 4)) asDataFrame at: 1 at:1 put: 5) >>> (#(#(5 2) #(3 4)) asDataFrame)"

5✔

569

5✔

570

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame at: 2 at: 1 put: #R2C1) >>> (#(#(r1c1 r1c2) #(R2C1 r2c2)) asDataFrame)"

5✔

571

5✔

572

        contents at: rowNumber at: columnNumber put: value

5✔

573

5✔

574

5✔

575

{ #category : 'accessing' }

5✔

576

DataFrame >> at: rowIndex at: columnIndex transform: aBlock [

5✔

577

        "Evaluate aBlock on the value at the intersection of rowIndex and columnIndex and replace that value with the result"

5✔

578

5✔

579

        "(#(#(1 2) #(3 4)) asDataFrame at: 1 at:1 transform: [:x| x - 1]) >>>(#(#(0 2) #(3 4)) asDataFrame)"

5✔

580

5✔

581

        | value |

5✔

582

        value := self at: rowIndex at: columnIndex.

5✔

583

        self at: rowIndex at: columnIndex put: (aBlock value: value)

5✔

584

5✔

585

5✔

586

{ #category : 'accessing' }

5✔

587

DataFrame >> at: aNumber transform: aBlock [

5✔

588

        "Evaluate aBlock on the row at aNumber and replace that row with the result"

5✔

589

5✔

590

        "(#(#(1 2) #(3 4)) asDataFrame at: 1 transform: [:x| x - 1]) >>>(#(#(0 1) #(3 4)) asDataFrame)"

5✔

591

5✔

592

        ^ self rowAt: aNumber transform: aBlock

5✔

593

5✔

594

5✔

595

{ #category : 'accessing' }

5✔

596

DataFrame >> atAll: indexes [

5✔

597

        "For polymorphisme with other collections."

5✔

598

5✔

599

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame atAll: #(1 3)) >>> (#(#(1 2) #(5 6)) asDataFrame)"

5✔

600

5✔

601

        "(#(#(r1c1 r1c2) #(r2c1 r2c2) #(r3c1 r3c2)) asDataFrame atAll: #(1 3)) >>> (#(#(r1c1 r1c2) #(r3c1 r3c2)) asDataFrame)"

5✔

602

5✔

603

        ^ self rowsAt: indexes

5✔

604

5✔

605

5✔

606

{ #category : 'statistics' }

5✔

607

DataFrame >> average [

5✔

608

        "Average is the ratio of sum of values in a set to the number of values in the set"

5✔

609

5✔

610

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame average) >>> (Dictionary newFrom: {(1 -> 20).(2 -> 2)})"

5✔

611

5✔

612

        ^ self applyToAllColumns: #average

5✔

613

5✔

614

5✔

615

{ #category : 'data-types' }

5✔

616

DataFrame >> calculateDataTypes [

5✔

617

5✔

618

        self asArrayOfColumns doWithIndex: [ :column :i |

5✔

619

                self dataTypes

5✔

620

                        at: (self columnNames at: i)

5✔

621

                        put: column calculateDataType ]

5✔

622

5✔

623

5✔

624

{ #category : 'comparing' }

5✔

625

DataFrame >> closeTo: aDataFrame [

5✔

626

        "(#(#(1 2) #(3 4)) asDataFrame closeTo: #(#(1.0001 1.9999) #(3 4.0001)) asDataFrame ) >>> true"

5✔

627

5✔

628

        "(#(#(1 2) #(3 4)) asDataFrame closeTo: #(#(1 1) #(3 4)) asDataFrame ) >>> false"

5✔

629

5✔

630

        aDataFrame species = self species ifFalse: [ ^ false ].

5✔

631

5✔

632

        aDataFrame dimensions = self dimensions ifFalse: [ ^ false ].

5✔

633

5✔

634

        (aDataFrame rowNames = self rowNames and: [

5✔

635

                 aDataFrame columnNames = self columnNames ]) ifFalse: [ ^ false ].

5✔

636

5✔

637

        1 to: self numberOfRows do: [ :i |

5✔

638

                1 to: self numberOfColumns do: [ :j |

5✔

639

                        | value |

5✔

640

                        value := self at: i at: j.

5✔

641

                        (value isNumber

5✔

642

                                 ifTrue: [ value closeTo: (aDataFrame at: i at: j) ]

5✔

643

                                 ifFalse: [ value = (aDataFrame at: i at: j) ]) ifFalse: [

5✔

644

                                ^ false ] ] ].

5✔

645

5✔

646

        ^ true

5✔

647

5✔

648

5✔

649

{ #category : 'comparing' }

5✔

650

DataFrame >> closeTo: aDataFrame precision: epsilon [

5✔

651

5✔

652

        "(#(#(1 2) #(3 4)) asDataFrame closeTo: #(#(1.2 2.19) #(3 4)) asDataFrame precision: 0.2 ) >>> true"

5✔

653

5✔

654

        "(#(#(1 2) #(3 4)) asDataFrame closeTo: #(#(1.21 2) #(3 4)) asDataFrame precision: 0.2 ) >>> false"

5✔

655

5✔

656

        aDataFrame species = self species ifFalse: [ ^ false ].

5✔

657

5✔

658

        aDataFrame dimensions = self dimensions ifFalse: [ ^ false ].

5✔

659

5✔

660

        (aDataFrame rowNames = self rowNames and: [ aDataFrame columnNames = self columnNames ]) ifFalse: [ ^ false ].

5✔

661

5✔

662

        1 to: self numberOfRows do: [ :i |

5✔

663

                1 to: self numberOfColumns do: [ :j |

5✔

664

                        | value |

5✔

665

                        value := self at: i at: j.

5✔

666

                        (value isNumber

5✔

667

                                 ifTrue: [ value closeTo: (aDataFrame at: i at: j) precision: epsilon ]

5✔

668

                                 ifFalse: [ value = (aDataFrame at: i at: j) ]) ifFalse: [ ^ false ] ] ].

5✔

669

5✔

670

        ^ true

5✔

671

5✔

672

5✔

673

{ #category : 'enumerating' }

5✔

674

DataFrame >> collect: aBlock [

5✔

675

        "Overrides the Collection>>collect to create DataFrame with the same number of columns as values in the first row"

5✔

676

        | firstRow newDataFrame |

5✔

677

5✔

678

        firstRow := aBlock value: (self rowAt: 1) copy.

5✔

679

        newDataFrame := self class new: 0@firstRow size.

5✔

680

        newDataFrame columnNames: firstRow keys.

5✔

681

5✔

682

        self do: [:each | newDataFrame add: (aBlock value: each copy)].

5✔

683

        ^ newDataFrame

5✔

684

5✔

685

5✔

686

{ #category : 'enumerating' }

5✔

687

DataFrame >> collectWithIndex: aBlock [

5✔

688

        "Overrides the Collection>>collect to create DataFrame with the same number of columns as values in the first row"

5✔

689

        | firstRow newDataFrame |

5✔

690

5✔

691

        firstRow := aBlock value: (self rowAt: 1) copy value: 1.

5✔

692

        newDataFrame := self class new: 0@firstRow size.

5✔

693

        newDataFrame columnNames: firstRow keys.

5✔

694

5✔

695

        self doWithIndex: [ : each : index | newDataFrame add: (aBlock value: each copy value: index) ].

5✔

696

        ^ newDataFrame

5✔

697

5✔

698

5✔

699

{ #category : 'accessing' }

5✔

700

DataFrame >> column: columnName [

5✔

701

        "Answer the column with columnName as a DataSeries or signal an exception if a column with that name was not found"

5✔

702

        | index |

5✔

703

        index := self indexOfColumnNamed: columnName.

5✔

704

        ^ self columnAt: index

5✔

705

5✔

706

5✔

707

{ #category : 'accessing' }

5✔

708

DataFrame >> column: columnName ifAbsent: exceptionBlock [

5✔

709

        "Answer the column with columnName as a DataSeries or evaluate exception block if a column with that name was not found"

5✔

710

        | index |

5✔

711

        index := self

5✔

712

                indexOfColumnNamed: columnName

5✔

713

                ifAbsent: [ ^ exceptionBlock value ].

5✔

714

5✔

715

        ^ self columnAt: index

5✔

716

5✔

717

5✔

718

{ #category : 'accessing' }

5✔

719

DataFrame >> column: columnName put: anArray [

5✔

720

        "Replace the current values of column with columnName with anArray or signal an exception if a column with that name was not found"

5✔

721

        | index |

5✔

722

        index := self indexOfColumnNamed: columnName.

5✔

723

        ^ self columnAt: index put: anArray

5✔

724

5✔

725

5✔

726

{ #category : 'accessing' }

5✔

727

DataFrame >> column: columnName put: anArray ifAbsent: exceptionBlock [

5✔

728

        "Replace the current values of column with columnName with anArray or evaluate exception block if a column with that name was not found"

5✔

729

        | index |

5✔

730

        index := self

5✔

731

                indexOfColumnNamed: columnName

5✔

732

                ifAbsent: [ ^ exceptionBlock value ].

5✔

733

5✔

734

        ^ self columnAt: index put: anArray

5✔

735

5✔

736

5✔

737

{ #category : 'accessing' }

5✔

738

DataFrame >> column: columnName transform: aBlock [

5✔

739

        "Evaluate aBlock on the column with columnName and replace column with the result. Signal an exception if columnName was not found"

5✔

740

        | column |

5✔

741

        column := self column: columnName.

5✔

742

        self column: columnName put: (aBlock value: column) asArray

5✔

743

5✔

744

5✔

745

{ #category : 'accessing' }

5✔

746

DataFrame >> column: columnName transform: aBlock ifAbsent: exceptionBlock [

5✔

747

        "Evaluate aBlock on the column with columnName and replace column with the result. Evaluate exceptionBlock if columnName was not found"

5✔

748

        | column |

5✔

749

        column := self column: columnName ifAbsent: [ ^ exceptionBlock value ].

5✔

750

        self column: columnName put: (aBlock value: column)

5✔

751

5✔

752

5✔

753

{ #category : 'accessing' }

5✔

754

DataFrame >> columnAt: aNumber [

5✔

755

        "Returns the column of a DataFrame at column index aNumber"

5✔

756

5✔

757

        "(#(#(1 2) #(5 6)) asDataFrame columnAt: 2) >>> (#(2 6) asDataSeries) "

5✔

758

5✔

759

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame columnAt: 2) >>> (#(r1c2 r2c2) asDataSeries) "

5✔

760

5✔

761

        ^ (DataSeries

5✔

762

                   withKeys: self rowNames

5✔

763

                   values: (contents columnAt: aNumber))

5✔

764

                  name: (self columnNames at: aNumber);

5✔

765

                  yourself

5✔

766

5✔

767

5✔

768

{ #category : 'accessing' }

5✔

769

DataFrame >> columnAt: aNumber put: anArray [

5✔

770

        "Replaces the column at column index aNumber with contents of the array anArray"

5✔

771

5✔

772

        "(#(#(1 2) #(3 4)) asDataFrame columnAt: 2 put: #(5 6)) >>> (#(#(1 5) #(3 6)) asDataFrame) "

5✔

773

5✔

774

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame columnAt: 2 put: #(R1C2 R2C2)) >>> (#(#(r1c1 R1C2) #(r2c1 R2C2)) asDataFrame) "

5✔

775

5✔

776

        anArray size = self numberOfRows ifFalse: [ SizeMismatch signal ].

5✔

777

5✔

778

        contents columnAt: aNumber put: anArray

5✔

779

5✔

780

5✔

781

{ #category : 'accessing' }

5✔

782

DataFrame >> columnAt: aNumber transform: aBlock [

5✔

783

        "Evaluate aBlock on the column at aNumber and replace that column with the result"

5✔

784

5✔

785

        "(#(#(1 2) #(3 4)) asDataFrame columnAt: 2 transform: [ :x | x / 2 ]) >>> (#(#(1 1) #(3 2)) asDataFrame) "

5✔

786

5✔

787

        | column |

5✔

788

        column := self columnAt: aNumber.

5✔

789

        self columnAt: aNumber put: (aBlock value: column) asArray

5✔

790

5✔

791

5✔

792

{ #category : 'accessing' }

5✔

793

DataFrame >> columnNames [

5✔

794

        "Returns the column names of a DataFrame"

5✔

795

5✔

796

        ^ columnNames

5✔

797

5✔

798

5✔

799

{ #category : 'accessing' }

5✔

800

DataFrame >> columnNames: aCollection [

5✔

801

        "Sets the column names of a DataFrame with contents of the collection aCollection"

5✔

802

5✔

803

        | type |

5✔

804

        aCollection size = self numberOfColumns

5✔

805

                ifFalse: [ SizeMismatch signal: 'Wrong number of column names' ].

5✔

806

5✔

807

        aCollection asSet size = aCollection size

5✔

808

                ifFalse: [ Error signal: 'All column names must be distinct' ].

5✔

809

5✔

810

        self columnNames ifNotNil: [

5✔

811

                self columnNames withIndexDo: [ :currentColumnName :i |

5✔

812

                        type := dataTypes at: currentColumnName.

5✔

813

                        dataTypes removeKey: currentColumnName.

5✔

814

                        dataTypes at: (aCollection at: i) put: type ] ].

5✔

815

5✔

816

        columnNames := aCollection asOrderedCollection

5✔

817

5✔

818

5✔

819

{ #category : 'accessing' }

5✔

820

DataFrame >> columns [

5✔

821

        "Returns a collection of all columns"

5✔

822

5✔

823

        "(#(#(1 2) #(3 4)) asDataFrame columns) >>> (#( #(1 3) #(2 4) ) collect: #asDataSeries) "

5✔

824

5✔

825

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame columns) >>> (#( #(r1c1 r2c1) #(r1c2 r2c2) ) collect: #asDataSeries) "

5✔

826

5✔

827

        ^ (1 to: self numberOfColumns) collect: [ :j | self columnAt: j ]

5✔

828

5✔

829

5✔

830

{ #category : 'accessing' }

5✔

831

DataFrame >> columns: anArrayOfNames [

5✔

832

        "Returns a collection of columns whose column names are present in the array anArrayOfNames"

5✔

833

5✔

834

        | anArrayOfNumbers |

5✔

835

5✔

836

        anArrayOfNumbers := anArrayOfNames

5✔

837

                collect: [ :name |

5✔

838

                        self indexOfColumnNamed: name ].

5✔

839

5✔

840

        ^ self columnsAt: anArrayOfNumbers

5✔

841

5✔

842

5✔

843

{ #category : 'accessing' }

5✔

844

DataFrame >> columns: anArrayOfColumnNames put: anArrayOfArrays [

5✔

845

        "Replaces the columns whose column names are present in the array anArrayOfColumnNames with the contents of the array of arrays anArrayOfArrays"

5✔

846

5✔

847

        anArrayOfArrays size = anArrayOfColumnNames size

5✔

848

                ifFalse: [ SizeMismatch signal ].

5✔

849

5✔

850

        anArrayOfColumnNames with: anArrayOfArrays do: [ :name :array |

5✔

851

                self column: name put: array ]

5✔

852

5✔

853

5✔

854

{ #category : 'accessing' }

5✔

855

DataFrame >> columnsAllBut: aCollectionOfColumnNames [

5✔

856

        "Returns a <Collection> of except those present in aCollectionOfColumnNames"

5✔

857

5✔

858

        ^ self columns: (self columnNames copyWithoutAll: aCollectionOfColumnNames)

5✔

859

5✔

860

5✔

861

{ #category : 'accessing' }

5✔

862

DataFrame >> columnsAt: anArrayOfNumbers [

5✔

863

        "Returns a collection of columns whose column indices are present in the array anArrayOfNumbers"

5✔

864

5✔

865

        "(#(#(1 2 3) #(4 5 6)) asDataFrame columnsAt: #(1 3)) >>> (#(#(1 3) #(4 6)) asDataFrame)"

5✔

866

5✔

867

        "(#(#(r1c1 r1c2 r1c3) #(r2c1 r2c2 r2c3)) asDataFrame columnsAt: #(1 3)) >>> (#(#(r1c1 r1c3) #(r2c1 r2c3)) asDataFrame)"

5✔

868

5✔

869

        | newColumnNames |

5✔

870

        newColumnNames := anArrayOfNumbers collect: [ :i |

5✔

871

                                  self columnNames at: i ].

5✔

872

5✔

873

        ^ DataFrame

5✔

874

                  withDataFrameInternal: (self contents columnsAt: anArrayOfNumbers)

5✔

875

                  rowNames: self rowNames

5✔

876

                  columnNames: newColumnNames

5✔

877

5✔

878

5✔

879

{ #category : 'accessing' }

5✔

880

DataFrame >> columnsAt: anArrayOfNumbers put: anArrayOfArrays [

5✔

881

        "Replaces the columns whose column indices are present in the array anArrayOfNumbers with the contents of the array of arrays anArrayOfArrays"

5✔

882

5✔

883

        "(#(#(1 2 3) #(4 5 6)) asDataFrame columnsAt: #(1 3) put: #(#(10 40) #(30 60))) >>> (#(#(10 2 30) #(40 5 60)) asDataFrame)"

5✔

884

5✔

885

        anArrayOfArrays size = anArrayOfNumbers size ifFalse: [

5✔

886

                SizeMismatch signal ].

5✔

887

5✔

888

        anArrayOfNumbers

5✔

889

                with: anArrayOfArrays

5✔

890

                do: [ :index :array | self columnAt: index put: array ]

5✔

891

5✔

892

5✔

893

{ #category : 'accessing' }

5✔

894

DataFrame >> columnsFrom: begin to: end [

5✔

895

        "Returns a collection of columns whose column indices are present between begin and end"

5✔

896

5✔

897

        "(#(#(1 2 3) #(4 5 6)) asDataFrame columnsFrom: 1 to: 2)  >>> (#(#(1 2) #(4 5)) asDataFrame)"

5✔

898

5✔

899

        "(#(#(r1c1 r1c2 r1c3) #(r2c1 r2c2 r2c3)) asDataFrame columnsFrom: 1 to: 2) >>> (#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame)"

5✔

900

5✔

901

        | array |

5✔

902

        array := begin < end

5✔

903

                         ifTrue: [ (begin to: end) asArray ]

5✔

904

                         ifFalse: [ (end to: begin) asArray reverse ].

5✔

905

5✔

906

        ^ self columnsAt: array

5✔

907

5✔

908

5✔

909

{ #category : 'accessing' }

5✔

910

DataFrame >> columnsFrom: firstNumber to: secondNumber put: anArrayOfArrays [

5✔

911

        "Replaces the columns whose column indices are present between firstNumber and secondNumber with the contents of the array of arrays anArrayOfArrays"

5✔

912

5✔

913

        "(#(#(1 2 3) #(4 5 6)) asDataFrame columnsFrom: 1 to: 2 put:#(#(7 8) #(9 10)))  >>> (#(#(7 9 3) #(8 10 6)) asDataFrame)"

5✔

914

5✔

915

        | interval |

5✔

916

        anArrayOfArrays size = ((firstNumber - secondNumber) abs + 1)

5✔

917

                ifFalse: [ SizeMismatch signal ].

5✔

918

5✔

919

        interval := secondNumber >= firstNumber

5✔

920

                            ifTrue: [ firstNumber to: secondNumber ]

5✔

921

                            ifFalse: [ (secondNumber to: firstNumber) reversed ].

5✔

922

5✔

923

        interval withIndexDo: [ :columnIndex :i |

5✔

924

                self columnAt: columnIndex put: (anArrayOfArrays at: i) ]

5✔

925

5✔

926

5✔

927

{ #category : 'accessing' }

5✔

928

DataFrame >> contents [

5✔

929

        "Returns all the values of the DataFrame"

5✔

930

5✔

931

        ^ contents

5✔

932

5✔

933

5✔

934

{ #category : 'copying' }

5✔

935

DataFrame >> copyReplace: missingValue in2DCollectionBy: arrayOfReplacementValues [

5✔

936

        "I am a 2D collection and the goal is to return a copy replace the missing values by the values of my second parameter. The good value is the index of the missing value in the sub collection.

5✔

937

5✔

938

        I am needed for the project pharo-ai/data-imputers. I can work without that method but the time it will take to replace the missing values will be huuuuuuuuuuuge"

5✔

939

5✔

940

        | copy |

5✔

941

        copy := self copy.

5✔

942

        1 to: self numberOfColumns do: [ :columnIndex |

5✔

943

                | replacementValue |

5✔

944

                replacementValue := arrayOfReplacementValues at: columnIndex.

5✔

945

                1 to: self numberOfRows do: [ :rowIndex | (self at: rowIndex at: columnIndex) = missingValue ifTrue: [ self copy at: rowIndex at: columnIndex put: replacementValue ] ] ].

5✔

946

        ^ copy

5✔

947

5✔

948

5✔

949

{ #category : 'statistics' }

5✔

950

DataFrame >> correlationMatrix [

5✔

951

        "Calculate a correlation matrix (correlation of every column with every column) using Pearson's correlation coefficient"

5✔

952

        ^ self correlationMatrixUsing: DataPearsonCorrelationMethod

5✔

953

5✔

954

5✔

955

{ #category : 'statistics' }

5✔

956

DataFrame >> correlationMatrixUsing: aCorrelationCoefficient [

5✔

957

        "Calculate a correlation matrix (correlation of every column with every column) using the given correlation coefficient"

5✔

958

5✔

959

        | numericalColumnNames correlationMatrix firstColumn secondColumn correlation |

5✔

960

5✔

961

        numericalColumnNames := self columnNames select: [ :columnName |

5✔

962

                (self column: columnName) isNumerical ].

5✔

963

5✔

964

        numericalColumnNames ifEmpty: [

5✔

965

                Error signal: 'This data frame does not have any numerical columns' ].

5✔

966

5✔

967

        correlationMatrix := self class

5✔

968

                withRowNames: numericalColumnNames

5✔

969

                columnNames: numericalColumnNames.

5✔

970

5✔

971

        1 to: numericalColumnNames size do: [ :i |

5✔

972

                1 to: i - 1 do: [ :j |

5✔

973

                        firstColumn := self column: (numericalColumnNames at: i).

5✔

974

                        secondColumn := self column: (numericalColumnNames at: j).

5✔

975

                        correlation := firstColumn correlationWith: secondColumn using: aCorrelationCoefficient.

5✔

976

5✔

977

                        correlationMatrix at: i at: j put: correlation.

5✔

978

                        correlationMatrix at: j at: i put: correlation ] ].

5✔

979

5✔

980

        1 to: numericalColumnNames size do: [ :i |

5✔

981

                correlationMatrix at: i at: i put: 1 ].

5✔

982

5✔

983

        ^ correlationMatrix

5✔

984

5✔

985

5✔

986

{ #category : 'accessing' }

5✔

987

DataFrame >> crossTabulate: colName1 with: colName2 [

5✔

988

        "Returns the cross tabulation of a column named colName1 with the column named colName2 of the DataFrame"

5✔

989

5✔

990

        | col1 col2 |

5✔

991

5✔

992

        col1 := self column: colName1.

5✔

993

        col2 := self column: colName2.

5✔

994

5✔

995

        ^ col1 crossTabulateWith: col2

5✔

996

5✔

997

5✔

998

{ #category : 'copying' }

5✔

999

DataFrame >> dataPreProcessingEncodeWith: anEncoder [

5✔

1000

        "This method is here to speed up pharo-ai/data-preprocessing algos without coupling both projects."

5✔

1001

5✔

1002

        | copy cache |

5✔

1003

        copy := self copy.

5✔

1004

        cache := IdentityDictionary new.

5✔

1005

        self columns doWithIndex: [ :dataSerie :columnIndex |

5✔

1006

                | category |

5✔

1007

                category := cache at: columnIndex ifAbsentPut: [ ((anEncoder categories at: columnIndex) collectWithIndex: [ :elem :index | elem -> index ]) asDictionary ].

5✔

1008

                dataSerie doWithIndex: [ :element :rowIndex |

5✔

1009

                        copy at: rowIndex at: columnIndex put: (category at: element ifAbsent: [ AIMissingCategory signalFor: element ]) ] ].

5✔

1010

5✔

1011

        ^ copy

5✔

1012

5✔

1013

5✔

1014

{ #category : 'data-types' }

5✔

1015

DataFrame >> dataTypeOfColumn: aColumnName [

5✔

1016

        "Given a column name of the DataFrame, it returns the data type of that column"

5✔

1017

5✔

1018

        ^ dataTypes at: aColumnName

5✔

1019

5✔

1020

5✔

1021

{ #category : 'data-types' }

5✔

1022

DataFrame >> dataTypeOfColumn: aColumnName put: aDataType [

5✔

1023

        "Given a column name and a data type, it replaces the original data type of that column with the data type that was given as a parameter"

5✔

1024

5✔

1025

        dataTypes at: aColumnName put: aDataType

5✔

1026

5✔

1027

5✔

1028

{ #category : 'data-types' }

5✔

1029

DataFrame >> dataTypeOfColumnAt: aNumber [

5✔

1030

        "Given a column index of the DataFrame, it returns the data type of that column"

5✔

1031

5✔

1032

        ^ self dataTypeOfColumn: (columnNames at: aNumber)

5✔

1033

5✔

1034

5✔

1035

{ #category : 'data-types' }

5✔

1036

DataFrame >> dataTypeOfColumnAt: aNumber put: aDataType [

5✔

1037

        "Given a column index and a data type, it replaces the original data type of that column with the data type that was given as a parameter"

5✔

1038

5✔

1039

        ^ self dataTypeOfColumn: (columnNames at: aNumber) put: aDataType

5✔

1040

5✔

1041

5✔

1042

{ #category : 'accessing' }

5✔

1043

DataFrame >> dataTypes [

5✔

1044

        "Returns the data types of each column"

5✔

1045

5✔

1046

        ^ dataTypes

5✔

1047

5✔

1048

5✔

1049

{ #category : 'accessing' }

5✔

1050

DataFrame >> dataTypes: anObject [

5✔

1051

5✔

1052

        dataTypes := anObject

5✔

1053

5✔

1054

5✔

1055

{ #category : 'accessing' }

5✔

1056

DataFrame >> defaultHeadTailSize [

5✔

1057

5✔

1058

^ 5

5✔

1059

5✔

1060

5✔

1061

{ #category : 'statistics' }

5✔

1062

DataFrame >> describe [

5✔

1063

        "Answer another data frame with statistics describing the columns of this data frame"

5✔

1064

5✔

1065

        | content |

5✔

1066

        content := self numericalColumns collect: [ :column |

5✔

1067

5✔

1068

                                   column countNonNils.

5✔

1069

                                   column average.

5✔

1070

                                   column stdev.

5✔

1071

                                   column min.

5✔

1072

                                   column firstQuartile.

5✔

1073

                                   column secondQuartile.

5✔

1074

                                   column thirdQuartile.

5✔

1075

                                   column max.

5✔

1076

                                   column calculateDataType } ].

5✔

1077

5✔

1078

        ^ self class

5✔

1079

                  withRows: content

5✔

1080

                  rowNames: self numericalColumnNames

5✔

1081

                  columnNames: #( count mean std min '25%' '50%' '75%' max dtype )

5✔

1082

5✔

1083

5✔

1084

{ #category : 'accessing' }

5✔

1085

DataFrame >> dimensions [

5✔

1086

        "Returns the number of rows and number of columns in a DataFrame"

5✔

1087

5✔

1088

        "(#(#(1 2) #(3 4)) asDataFrame dimensions) >>> (2@2)"

5✔

1089

5✔

1090

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame dimensions) >>> (3@2)"

5✔

1091

5✔

1092

        "(#(#(1 2 3) #(4 5 6)) asDataFrame dimensions) >>> (2@3)"

5✔

1093

5✔

1094

        ^ self numberOfRows @ self numberOfColumns

5✔

1095

5✔

1096

5✔

1097

{ #category : 'enumerating' }

5✔

1098

DataFrame >> do: aBlock [

5✔

1099

"We enumerate through the data enrties - through rows of a data frame"

5✔

1100

        | row |

5✔

1101

5✔

1102

        1 to: self numberOfRows do: [ :i |

5✔

1103

                row := self rowAt: i.

5✔

1104

                aBlock value: row.

5✔

1105

                "A hack to allow modification of rows inside do block"

5✔

1106

                self rowAt: i put: row asArray ]

5✔

1107

5✔

1110

DataFrame >> findAll: anObject atColumn: columnName [

5✔

1111

        "Returns rowNames of rows having anObject at columnName"

5✔

1112

5✔

1113

        ^ self rowNames select: [ :row | ((self column: columnName) at: row) = anObject ]

5✔

1114

5✔

1117

DataFrame >> findAllIndicesOf: anObject atColumn: columnName [

5✔

1118

        "Returns indices of rows having anObject at columnName"

5✔

1119

        | output |

5✔

1120

        output := OrderedCollection new.

5✔

1121

        self rowNames withIndexDo: [ :row :index | ((self column: columnName) at: row) = anObject ifTrue: [ output add: index ]].

5✔

1122

        ^ output

5✔

1123

5✔

1126

DataFrame >> first [

5✔

1127

        "Returns the first row of the DataFrame"

5✔

1128

5✔

1129

        "(#(#(1 2) #(3 4)) asDataFrame first) >>> (#(1 2) asDataSeries)"

5✔

1130

5✔

1131

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame first) >>> (#(r1c1 r1c2) asDataSeries)"

5✔

1132

5✔

1133

        ^ self at: 1

5✔

1134

5✔

1137

DataFrame >> firstQuartile [

5✔

1138

        "25% of the values in a set are smaller than or equal to the first Quartile of that set"

5✔

1139

5✔

1140

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame firstQuartile) >>> (Dictionary newFrom: {(1 -> 10).(2 -> 1)})"

5✔

1141

5✔

1142

        ^ self applyToAllColumns: #firstQuartile

5✔

1143

5✔

1146

DataFrame >> getJointColumnsWith: aDataFrame [

5✔

1147

        "comment stating purpose of message"

5✔

1148

5✔

1149

        | columnIntersection outputColumns |

5✔

1150

        columnIntersection := (self columnNames intersection: (aDataFrame columnNames)) asSet.

5✔

1151

        outputColumns := OrderedCollection new.

5✔

1152

        self columnNames do: [ :column |

5✔

1153

                (columnIntersection includes: column)

5✔

1154

                        ifTrue: [ outputColumns add: ('' join: {column, '_x'}) ]

5✔

1155

                        ifFalse: [ outputColumns add: column ]

5✔

1156

].

5✔

1157

        aDataFrame columnNames do: [ :column |

5✔

1158

                (columnIntersection includes: column)

5✔

1159

                        ifTrue: [ outputColumns add: ('' join: {column, '_y'}) ]

5✔

1160

                        ifFalse: [ outputColumns add: column ]

5✔

1161

].

5✔

1162

5✔

1163

        ^ outputColumns

5✔

1164

5✔

1167

DataFrame >> group: anAggregateColumnName by: aGroupColumnName aggregateUsing: aBlock [

5✔

1168

        "Group the values of the cloumn named anAggregateColumnName by the unique values of the column named aGroupColumnName, aggregate them using aBlock. With the same name as anAggregateColumnName"

5✔

1169

5✔

1170

        ^ self group: anAggregateColumnName by: aGroupColumnName aggregateUsing: aBlock as: anAggregateColumnName

5✔

1171

5✔

1174

DataFrame >> group: anAggregateColumnName by: aGroupColumnName aggregateUsing: aBlock as: aNewColumnName [

5✔

1175

        "Group the values of the cloumn named anAggregateColumnName by the unique values of the column named aGroupColumnName, aggregate them using aBlock. With a new column name aNewColumnName"

5✔

1176

5✔

1177

        | groupColumn aggregateColumn |

5✔

1178

5✔

1179

        aGroupColumnName = anAggregateColumnName

5✔

1180

                ifTrue: [ Error signal: 'Can not group a column by itself' ].

5✔

1181

5✔

1182

        groupColumn := self column: aGroupColumnName.

5✔

1183

        aggregateColumn := self column: anAggregateColumnName.

5✔

1184

5✔

1185

        ^ aggregateColumn groupBy: groupColumn aggregateUsing: aBlock as: aNewColumnName

5✔

1186

5✔

1189

DataFrame >> groupBy: columnName aggregate: anArrayOfUsingAsStatements [

5✔

1190

5✔

1191

        | aggregatedColumns |

5✔

1192

5✔

1193

        aggregatedColumns := anArrayOfUsingAsStatements collect: [ :aBlock |

5✔

1194

                aBlock value: self value: columnName ].

5✔

1195

5✔

1196

        ^ DataFrame

5✔

1197

                withColumns: aggregatedColumns

5✔

1198

                rowNames: aggregatedColumns first keys

5✔

1199

                columnNames: (aggregatedColumns collect: #name)

5✔

1200

5✔

1203

DataFrame >> hasNils [

5✔

1204

        "Returns true if there is atleast one nil value in the data frame. Returns false if there are no nil values in the dataframe"

5✔

1205

5✔

1206

        "(#(#(nil 2) #(nil 4)) asDataFrame hasNils) >>> true"

5✔

1207

5✔

1208

        "(#(#('nil' 'nil') #('nil' 'nil')) asDataFrame hasNils) >>> false"

5✔

1209

5✔

1210

        "(#(#(nil 'nil') #('nil' 'nil')) asDataFrame hasNils) >>> true"

5✔

1211

5✔

1212

        | arrayOfColumns |

5✔

1213

        arrayOfColumns := self asArrayOfColumns.

5✔

1214

        1 to: self numberOfColumns do: [ :column |

5✔

1215

                1 to: self numberOfRows do: [ :row |

5✔

1216

                ((arrayOfColumns at: column) at: row) ifNil: [ ^ true ] ] ].

5✔

1217

        ^ false

5✔

1218

5✔

1221

DataFrame >> hasNilsByColumn [

5✔

1222

        "Returns a dictionary which indicates the presence of any nil values column wise"

5✔

1223

5✔

1224

        "(#(#(1 2) #(nil 4)) asDataFrame hasNilsByColumn) >>> (Dictionary newFrom: {(1 -> true).(2 -> false)})"

5✔

1225

5✔

1226

        "(#(#('nil' 'nil') #('nil' 'nil')) asDataFrame hasNilsByColumn) >>> (Dictionary newFrom: {(1 -> false).(2 -> false)})"

5✔

1227

5✔

1228

        "(#(#(nil 'nil') #('nil' 'nil')) asDataFrame hasNilsByColumn) >>> (Dictionary newFrom: {(1 -> true).(2 -> false)})"

5✔

1229

5✔

1230

        | dictionary |

5✔

1231

        dictionary := Dictionary new.

5✔

1232

        self columnNames do: [ :each |

5✔

1233

                dictionary at: each put: (self column: each) hasNil ].

5✔

1234

        ^ dictionary

5✔

1235

5✔

1238

DataFrame >> head [

5✔

1239

        "Returns the first 5 rows of the DataFrame"

5✔

1240

5✔

1241

        ^ self head: self defaultHeadTailSize

5✔

1242

5✔

1245

DataFrame >> head: aNumber [

5✔

1246

        "Returns the first aNumber rows of a DataFrame"

5✔

1247

5✔

1248

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame head: 2) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

1249

5✔

1250

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame head: 1) >>> (#(#(r1c1 r1c2)) asDataFrame)"

5✔

1251

5✔

1252

        ^ self rowsAt: (1 to: (self numberOfRows min: aNumber))

5✔

1253

5✔

1256

DataFrame >> indexOfColumnNamed: columnName [

5✔

1257

        "Answer the index of a column with a given name or signal an exception if the column with that name was not found"

5✔

1258

        ^ self

5✔

1259

                indexOfColumnNamed: columnName

5✔

1260

                ifAbsent: [ self error: ('Column ', columnName, ' was not found') ]

5✔

1261

5✔

1264

DataFrame >> indexOfColumnNamed: columnName ifAbsent: exceptionBlock [

5✔

1265

        "Answer the index of a column with a given name or evaluate the exceptionBlock if the column with that name was not found"

5✔

1266

        ^ self columnNames

5✔

1267

                indexOf: columnName

5✔

1268

                ifAbsent: exceptionBlock

5✔

1269

5✔

1272

DataFrame >> indexOfRowNamed: rowName [

5✔

1273

        "Answer the index of a row with a given name or signal an exception if the row with that name was not found"

5✔

1274

        ^ self

5✔

1275

                indexOfRowNamed: rowName

5✔

1276

                ifAbsent: [ self error: ('Row ', rowName, ' was not found') ]

5✔

1277

5✔

1280

DataFrame >> indexOfRowNamed: rowName ifAbsent: exceptionBlock [

5✔

1281

        "Answer the index of a row with a given name or evaluate the exceptionBlock if the row with that name was not found"

5✔

1282

        ^ self rowNames

5✔

1283

                indexOf: rowName

5✔

1284

                ifAbsent: exceptionBlock

5✔

1285

5✔

1288

DataFrame >> info [

5✔

1289

        "Prints the number of entries and number of columns of a data frame. For each column of the data frame, it prints the column index, column name, number of non-nil values in the column and the data type of the contents of the column"

5✔

1290

5✔

1291

        ^ String streamContents: [ :aStream |

5✔

1292

                  aStream

5✔

1293

                          nextPutAll: 'DataFrame: ';

5✔

1294

                          print: self size;

5✔

1295

                          nextPutAll: ' entries';

5✔

1296

cr;

5✔

1297

                          nextPutAll: 'Data columns (total ';

5✔

1298

                          print: self columnNames size;

5✔

1299

                          nextPutAll: ' columns):';

5✔

1300

cr;

5✔

1301

                          nextPutAll: ' # | Column | Non-nil count | Dtype';

5✔

1302

cr;

5✔

1303

                          nextPutAll: '---------------------------------------------------';

5✔

1304

cr.

5✔

1305

                  self columnNames doWithIndex: [ :col :index |

5✔

1306

                          aStream

5✔

1307

                                  print: index;

5✔

1308

                                  nextPutAll: ' | '.

5✔

1309

                          col isString

5✔

1310

                                  ifTrue: [ aStream nextPutAll: col ]

5✔

1311

                                  ifFalse: [ aStream print: col ].

5✔

1312

                          aStream

5✔

1313

                                  nextPutAll: ' | ';

5✔

1314

                                  print: ((self columnAt: index) reject: #isNil) size;

5✔

1315

                                  nextPutAll: ' non-nil | ';

5✔

1316

                                  print: (self dataTypes at: col);

5✔

1317

                                  cr ] ]

5✔

1318

5✔

1321

DataFrame >> initialize [

5✔

1322

5✔

1323

        super initialize.

5✔

1324

5✔

1325

        dataTypes := Dictionary new.

5✔

1326

        contents := DataFrameInternal new.

5✔

1327

        self setDefaultRowColumnNames.

5✔

1328

        self calculateDataTypes

5✔

1329

5✔

1332

DataFrame >> initialize: aPoint [

5✔

1333

5✔

1334

        super initialize.

5✔

1335

5✔

1336

        contents := DataFrameInternal new: aPoint.

5✔

1337

        self setDefaultRowColumnNames.

5✔

1338

        self calculateDataTypes

5✔

1339

5✔

1342

DataFrame >> initializeColumns: anArrayOfArrays [

5✔

1343

5✔

1344

        contents := DataFrameInternal withColumns: anArrayOfArrays.

5✔

1345

        self setDefaultRowColumnNames.

5✔

1346

        self calculateDataTypes

5✔

1347

5✔

1350

DataFrame >> initializeContents: aDataFrameInternal rowNames: rows columnNames: columns [

5✔

1351

5✔

1352

        super initialize.

5✔

1353

5✔

1354

        contents := aDataFrameInternal.

5✔

1355

        self rowNames: rows.

5✔

1356

        self columnNames: columns.

5✔

1357

        self calculateDataTypes

5✔

1358

5✔

1361

DataFrame >> initializeRows: anArrayOfArrays [

5✔

1362

5✔

1363

        contents := DataFrameInternal withRows: anArrayOfArrays.

5✔

1364

        self setDefaultRowColumnNames.

5✔

1365

        self calculateDataTypes

5✔

1366

5✔

1369

DataFrame >> inject: thisValue into: binaryBlock [

5✔

1370

        | series |

5✔

1371

        series := super inject: thisValue into: binaryBlock.

5✔

1372

        series name: series defaultName.

5✔

1373

        ^ series

5✔

1374

5✔

1377

DataFrame >> innerJoin: aDataFrame [

5✔

1378

        "Performs inner join on aDataFrame with rowNames as keys"

5✔

1379

5✔

1380

        | outputRows outputDf |

5✔

1381

5✔

1382

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

1383

5✔

1384

        "Using select instead of intersection to preserve order"

5✔

1385

        outputRows := self rowNames select: [ :row | aDataFrame rowNames includes: row ].

5✔

1386

        outputRows do: [ :rowName |

5✔

1387

                | rowToAdd |

5✔

1388

                rowToAdd := (self row: rowName) asArray, (aDataFrame row: rowName) asArray.

5✔

1389

                outputDf addRow: rowToAdd named: rowName.

5✔

1390

].

5✔

1391

5✔

1392

        ^ outputDf

5✔

1393

5✔

1396

DataFrame >> innerJoin: aDataFrame on: aColumnName [

5✔

1397

        "Inner join of self with aDataFrame on a column that has a name aColumnName in both data frames"

5✔

1398

        ^ self innerJoin: aDataFrame onLeft: aColumnName onRight: aColumnName

5✔

1399

5✔

1402

DataFrame >> innerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [

5✔

1403

        "Performs inner join on aDataFrame with rowNames as keys.

5✔

1404

         rowNames are not preserved.

5✔

1405

         Duplicate column names will be appended with '_x' and '_y'."

5✔

1406

5✔

1407

        | outputRows outputDf |

5✔

1408

5✔

1409

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

1410

5✔

1411

        "Skip join if any of the dataframe is zero"

5✔

1412

        ((self size isZero) | (aDataFrame size isZero)) ifFalse: [

5✔

1413

                "Using select instead of intersection to preserve order"

5✔

1414

                outputRows := OrderedCollection new.

5✔

1415

                (self column: leftColumn) withIndexDo: [ :ele :index |

5✔

1416

                        ((aDataFrame column: rightColumn) includes: ele) ifTrue: [ outputRows add: index ] ].

5✔

1417

                outputRows do: [ :rowIndex |

5✔

1418

                        | rowsWithSameKey rowToAdd |

5✔

1419

                        rowsWithSameKey := aDataFrame findAllIndicesOf: (self at: rowIndex at: (self indexOfColumnNamed: leftColumn)) atColumn: rightColumn.

5✔

1420

                        rowsWithSameKey do: [ :rightRow |

5✔

1421

                                rowToAdd := (self rowAt: rowIndex) asArray, (aDataFrame rowAt: rightRow) asArray.

5✔

1422

                                outputDf addRow: rowToAdd named: (outputDf size + 1).

5✔

1423

].

5✔

1424

].

5✔

1425

].

5✔

1426

5✔

1427

        "Since Key is common, remove duplicate key column if it is of same name"

5✔

1428

        (leftColumn = rightColumn) ifTrue: [

5✔

1429

                outputDf removeColumn: (rightColumn, '_y').

5✔

1430

                outputDf renameColumn: (leftColumn, '_x') to: leftColumn.

5✔

1431

].

5✔

1432

5✔

1433

        ^ outputDf

5✔

1434

5✔

1465

DataFrame >> interquartileRange [

5✔

1466

        "The Inter Quartile Range is the difference between the third Quartile and the first Quartile"

5✔

1467

5✔

1468

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame interquartileRange) >>> (Dictionary newFrom: {(1 -> 20).(2 -> 2)})"

5✔

1469

5✔

1470

        ^ self applyToAllColumns: #interquartileRange

5✔

1471

5✔

1474

DataFrame >> leftJoin: aDataFrame [

5✔

1475

        "Performs left join on aDataFrame with rowNames as keys"

5✔

1476

5✔

1477

        | outputDf commonRows |

5✔

1478

5✔

1479

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

1480

        commonRows := self rowNames intersection: aDataFrame rowNames.

5✔

1481

        self rowNames do: [ :rowName |

5✔

1482

                | rowToAdd |

5✔

1483

                rowToAdd := (commonRows includes: rowName)

5✔

1484

                        ifTrue: [ (self row: rowName) asArray , (aDataFrame row: rowName) asArray ]

5✔

1485

                        ifFalse: [ (self row: rowName) asArray , (Array new: aDataFrame columnNames size) ].

5✔

1486

                outputDf addRow: rowToAdd named: rowName ].

5✔

1487

5✔

1488

        ^ outputDf

5✔

1489

5✔

1492

DataFrame >> leftJoin: aDataFrame on: aColumnName [

5✔

1493

        "Left join of self with aDataFrame on a column that has a name aColumnName in both data frames"

5✔

1494

        ^ self leftJoin: aDataFrame onLeft: aColumnName onRight: aColumnName

5✔

1495

5✔

1498

DataFrame >> leftJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [

5✔

1499

        "Performs left join on aDataFrame with rowNames as keys.

5✔

1500

         rowNames are not preserved.

5✔

1501

         Duplicate column names will be appended with '_x' and '_y'."

5✔

1502

5✔

1503

        | outputDf commonRows |

5✔

1504

5✔

1505

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

1506

5✔

1507

        commonRows := (self column: leftColumn) asArray intersection: (aDataFrame column: rightColumn) asArray.

5✔

1508

5✔

1509

        1 to: self size do: [ :rowIndex |

5✔

1510

                | rowsWithSameKey rowToAdd |

5✔

1511

                (commonRows includes: (self at: rowIndex at: (self indexOfColumnNamed: leftColumn)))

5✔

1512

                ifTrue: [

5✔

1513

                        "Row present in both df - append rows and add to outputDf"

5✔

1514

                        rowsWithSameKey := aDataFrame findAllIndicesOf: (self at: rowIndex at: (self indexOfColumnNamed: leftColumn)) atColumn: rightColumn.

5✔

1515

                        rowsWithSameKey do: [ :rightRow |

5✔

1516

                                rowToAdd := (self rowAt: rowIndex) asArray, (aDataFrame rowAt: rightRow) asArray.

5✔

1517

                                outputDf addRow: rowToAdd named: (outputDf size + 1).

5✔

1518

].

5✔

1519

5✔

1520

                ifFalse: [

5✔

1521

                        "Row present in left-only - append nils and add to outputDf"

5✔

1522

                        rowToAdd := (self rowAt: rowIndex) asArray, (Array new: aDataFrame columnNames size).

5✔

1523

                        outputDf addRow: rowToAdd named: (outputDf size + 1)

5✔

1524

].

5✔

1525

].

5✔

1526

5✔

1527

        "Since Key is common, remove duplicate key column if it is of same name"

5✔

1528

        (leftColumn = rightColumn) ifTrue: [

5✔

1529

                outputDf removeColumn: (rightColumn, '_y').

5✔

1530

                outputDf renameColumn: (leftColumn, '_x') to: leftColumn.

5✔

1531

].

5✔

1532

5✔

1533

        ^ outputDf

5✔

1534

5✔

1537

DataFrame >> max [

5✔

1538

        "Max is the largest value present in a set of values"

5✔

1539

5✔

1540

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame max) >>> (Dictionary newFrom: {(1 -> 30).(2 -> 3)})"

5✔

1541

5✔

1542

        ^ self applyToAllColumns: #max

5✔

1543

5✔

1546

DataFrame >> median [

5✔

1547

        "50% of data points have a value smaller or equal to the median . The median of a set of values is the middle value of the set when the set is arranged in increasing order."

5✔

1548

5✔

1549

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame median) >>> (Dictionary newFrom: {(1 -> 20).(2 -> 2)})"

5✔

1550

5✔

1551

        ^ self applyToAllColumns: #median

5✔

1552

5✔

1555

DataFrame >> min [

5✔

1556

        "Min is the smallest value present in a set of values"

5✔

1557

5✔

1558

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame min) >>> (Dictionary newFrom: {(1 -> 10).(2 -> 1)})"

5✔

1559

5✔

1560

        ^ self applyToAllColumns: #min

5✔

1561

5✔

1564

DataFrame >> mode [

5✔

1565

        "The mode of a set of values is the value that appears most often. "

5✔

1566

5✔

1567

        "(#(#(10 3) #(10 1) #(30 3)) asDataFrame mode) >>> (Dictionary newFrom: {(1 -> 10).(2 -> 3)})"

5✔

1568

5✔

1569

        ^ self applyToAllColumns: #mode

5✔

1570

5✔

1587

DataFrame >> numberOfColumns [

5✔

1588

        "Returns the number of columns of a DataFrame"

5✔

1589

5✔

1590

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame numberOfColumns) >>> 2 "

5✔

1591

5✔

1592

        "(#(#(1 2 3) #(4 5 6)) asDataFrame numberOfColumns) >>> 3 "

5✔

1593

5✔

1594

        ^ contents numberOfColumns

5✔

1595

5✔

1598

DataFrame >> numberOfNils [

5✔

1599

        "Returns a dictionary which indicates the number of nil values column wise"

5✔

1600

5✔

1601

        "(#(#(nil 2) #(nil 4)) asDataFrame numberOfNils) >>> (Dictionary newFrom: {(1 -> 2).(2 -> 0)})"

5✔

1602

5✔

1603

        "(#(#('nil' 'nil') #('nil' 'nil')) asDataFrame numberOfNils) >>> (Dictionary newFrom: {(1 -> 0).(2 -> 0)})"

5✔

1604

5✔

1605

        "(#(#(nil 'nil') #('nil' 'nil')) asDataFrame numberOfNils) >>> (Dictionary newFrom: {(1 -> 1).(2 -> 0)})"

5✔

1606

5✔

1607

        | dictionary count |

5✔

1608

        dictionary := Dictionary new.

5✔

1609

        self columnNames do: [ :each |

5✔

1610

                count := (self column: each) count: [ :each2 | each2 isNil ].

5✔

1611

                dictionary at: each put: count ].

5✔

1612

        ^ dictionary

5✔

1613

5✔

1616

DataFrame >> numberOfRows [

5✔

1617

        "Returns the number of rows of a DataFrame"

5✔

1618

5✔

1619

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame numberOfRows) >>> 3 "

5✔

1620

5✔

1621

        "(#(#(1 2 3) #(4 5 6)) asDataFrame numberOfRows) >>> 2 "

5✔

1622

5✔

1623

        ^ contents numberOfRows

5✔

1624

5✔

1627

DataFrame >> numericalColumnNames [

5✔

1628

        "Returns the names of all numerical columns of the dataframe"

5✔

1629

5✔

1630

        ^ self columnNames select: [ :columnName |

5✔

1631

                  (self dataTypes at: columnName) includesBehavior: Number ]

5✔

1632

5✔

1635

DataFrame >> numericalColumns [

5✔

1636

        "Returns all numerical columns of the dataframe"

5✔

1637

5✔

1638

        ^ self columns select: [ :column |

5✔

1639

                  (self dataTypes at: column name) includesBehavior: Number ]

5✔

1640

5✔

1643

DataFrame >> outerJoin: aDataFrame [

5✔

1644

        "Performs outer join on aDataFrame with rowNames as keys"

5✔

1645

5✔

1646

        | outputDf commonRows |

5✔

1647

5✔

1648

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

1649

        commonRows := self rowNames intersection: aDataFrame rowNames.

5✔

1650

        self rowNames do: [ :rowName |

5✔

1651

                | rowToAdd |

5✔

1652

                rowToAdd := (commonRows includes: rowName)

5✔

1653

                        ifTrue: [ (self row: rowName) asArray , (aDataFrame row: rowName) asArray ]

5✔

1654

                        ifFalse: [ (self row: rowName) asArray , (Array new: aDataFrame columnNames size) ].

5✔

1655

                outputDf addRow: rowToAdd named: rowName ].

5✔

1656

5✔

1657

        aDataFrame rowNames do: [ :rowName |

5✔

1658

                (commonRows includes: rowName)

5✔

1659

                        ifFalse: [ outputDf

5✔

1660

                                addRow: (Array new: self columnNames size) , (aDataFrame row: rowName) asArray

5✔

1661

                                named: rowName ] ].

5✔

1662

5✔

1663

        ^ outputDf

5✔

1664

5✔

1667

DataFrame >> outerJoin: aDataFrame on: aColumnName [

5✔

1668

        "Outer join of self with aDataFrame on a column that has a name aColumnName in both data frames"

5✔

1669

        ^ self outerJoin: aDataFrame onLeft: aColumnName onRight: aColumnName

5✔

1670

5✔

1673

DataFrame >> outerJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [

5✔

1674

        "Performs outer join on aDataFrame with rowNames as keys.

5✔

1675

         rowNames are not preserved.

5✔

1676

         Duplicate column names will be appended with '_x' and '_y'."

5✔

1677

5✔

1678

        | outputDf commonRows leftNils |

5✔

1679

5✔

1680

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

1681

5✔

1682

        commonRows := (self column: leftColumn) asArray intersection: (aDataFrame column: rightColumn) asArray.

5✔

1683

5✔

1684

        1 to: self size do: [ :rowIndex |

5✔

1685

                | rowsWithSameKey rowToAdd |

5✔

1686

                (commonRows includes: (self at: rowIndex at: (self indexOfColumnNamed: leftColumn)))

5✔

1687

                ifTrue: [

5✔

1688

                        "Row present in both df - append rows and add to outputDf"

5✔

1689

                        rowsWithSameKey := aDataFrame findAllIndicesOf: (self at: rowIndex at: (self indexOfColumnNamed: leftColumn)) atColumn: rightColumn.

5✔

1690

                        rowsWithSameKey do: [ :rightRow |

5✔

1691

                                rowToAdd := (self rowAt: rowIndex) asArray, (aDataFrame rowAt: rightRow) asArray.

5✔

1692

                                outputDf addRow: rowToAdd named: (outputDf size + 1).

5✔

1693

].

5✔

1694

5✔

1695

                ifFalse: [

5✔

1696

                        "Row present in left-only - append nils and add to outputDf"

5✔

1697

                        rowToAdd := (self rowAt: rowIndex) asArray, (Array new: aDataFrame columnNames size).

5✔

1698

                        outputDf addRow: rowToAdd named: (outputDf size + 1)

5✔

1699

].

5✔

1700

].

5✔

1701

5✔

1702

        1 to: aDataFrame size do: [ :rowIndex |

5✔

1703

                | rowToAdd |

5✔

1704

                (commonRows includes: (aDataFrame at: rowIndex at: (aDataFrame indexOfColumnNamed: rightColumn)))

5✔

1705

                ifFalse: [

5✔

1706

                        "Row present in right-only - construct row and append"

5✔

1707

                        leftNils := self columnNames collect: [ :col |

5✔

1708

                                col = rightColumn

5✔

1709

                                        ifTrue: [ (aDataFrame rowAt: rowIndex) at: rightColumn ]

5✔

1710

                                        ifFalse: [ nil ] ].

5✔

1711

                        rowToAdd := leftNils, (aDataFrame rowAt: rowIndex) asArray.

5✔

1712

                        outputDf addRow: rowToAdd named: (outputDf size + 1).

5✔

1713

].

5✔

1714

].

5✔

1715

5✔

1716

        "Since Key is common, remove duplicate key column if it is of same name"

5✔

1717

        (leftColumn = rightColumn) ifTrue: [

5✔

1718

                outputDf removeColumn: (rightColumn, '_y').

5✔

1719

                outputDf renameColumn: (leftColumn, '_x') to: leftColumn.

5✔

1720

].

5✔

1721

5✔

1722

        ^ outputDf

5✔

1723

5✔

1726

DataFrame >> postCopy [

5✔

1727

5✔

1728

        super postCopy.

5✔

1729

        contents := contents copy.

5✔

1730

        rowNames := rowNames copy.

5✔

1731

        columnNames := columnNames copy.

5✔

1732

        dataTypes := dataTypes copy

5✔

1733

5✔

1736

DataFrame >> printOn: aStream [

5✔

1737

5✔

1738

        | title |

5✔

1739

        title := self class name.

5✔

1740

        aStream

5✔

1741

                nextPutAll: (title first isVowel ifTrue: ['an '] ifFalse: ['a ']);

5✔

1742

                nextPutAll: title;

5✔

1743

                space;

5✔

1744

                nextPutAll: self dimensions asString

5✔

1745

5✔

1748

DataFrame >> privateRowNames: anArray [

5✔

1749

        "I am a private method skipping the assertions when my internal mecanisms know they can skip them."

5✔

1750

5✔

1751

        rowNames := anArray asOrderedCollection

5✔

1752

5✔

1755

DataFrame >> range [

5✔

1756

        "Range is the difference between the highest value and the lowest value in a set"

5✔

1757

5✔

1758

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame range) >>> (Dictionary newFrom: {(1 -> 20).(2 -> 2)})"

5✔

1759

5✔

1760

        ^ self applyToAllColumns: #range

5✔

1761

5✔

1764

DataFrame >> removeColumn: columnName [

5✔

1765

        "Removes the column named columnName from a data frame"

5✔

1766

5✔

1767

        | index |

5✔

1768

        index := self indexOfColumnNamed: columnName.

5✔

1769

        self removeColumnAt: index

5✔

1770

5✔

1773

DataFrame >> removeColumnAt: columnNumber [

5✔

1774

        "Removes the column at column index columnNumber from a data frame"

5✔

1775

5✔

1776

        "(#(#(1 2) #(3 4)) asDataFrame removeColumnAt: 2) >>> (#(#(1) #(3)) asDataFrame)"

5✔

1777

5✔

1778

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame removeColumnAt: 2) >>> (#(#(r1c1) #(r2c1)) asDataFrame)"

5✔

1779

5✔

1780

        (columnNumber < 1 or: [ columnNumber > self numberOfColumns ])

5✔

1781

                ifTrue: [ SubscriptOutOfBounds signalFor: columnNumber ].

5✔

1782

5✔

1783

        self dataTypes removeKey: (self columnAt: columnNumber) name.

5✔

1784

5✔

1785

        contents removeColumnAt: columnNumber.

5✔

1786

        columnNames := columnNames copyWithoutIndex: columnNumber

5✔

1787

5✔

1790

DataFrame >> removeColumns: aCollectionOfColumnNames [

5✔

1791

        "Removes all columns from a data frame whose names are present in the collection aCollectionOfColumnNames"

5✔

1792

5✔

1793

        aCollectionOfColumnNames do: [ :each |

5✔

1794

                self removeColumn: each.

5✔

1795

5✔

1796

5✔

1799

DataFrame >> removeColumnsAt: aCollectionOfColumnIndices [

5✔

1800

        "Removes all columns from a data frame whose column indices are present in the collection aCollectionOfColumnIndices"

5✔

1801

5✔

1802

        "(#(#(1 2 3) #(4 5 6)) asDataFrame removeColumnsAt: #(2 3)) >>> (#(#(1) #(4)) asDataFrame)"

5✔

1803

5✔

1804

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame removeColumnsAt: #(1 2)) >>> (#(#() #()) asDataFrame)"

5✔

1805

5✔

1806

        | columnNamesToRemove |

5✔

1807

        columnNamesToRemove := aCollectionOfColumnIndices collect: [ :i |

5✔

1808

                                       columnNames at: i ].

5✔

1809

        self removeColumns: columnNamesToRemove

5✔

1810

5✔

1813

DataFrame >> removeColumnsOfRowElementsSatisfing: aBlock onRowNamed: rowName [

5✔

1814

        "Removes columns from a data frame whose row elements at the row named rowName satisfy a given block"

5✔

1815

5✔

1816

        | index |

5✔

1817

        index := self indexOfRowNamed: rowName.

5✔

1818

        self removeColumnsOfRowElementsSatisfying: aBlock onRow: index

5✔

1819

5✔

1822

DataFrame >> removeColumnsOfRowElementsSatisfying: aBlock onRow: rowNumber [

5✔

1823

        "Removes columns from a data frame whose row elements at the row index rowNumber satisfy a given block"

5✔

1824

5✔

1825

        "(#(#(1 2 3) #(4 5 6)) asDataFrame removeColumnsOfRowElementsSatisfying: [ :x | x > 4 ] onRow: 2) >>> (#(#(1) #(4)) asDataFrame)"

5✔

1826

5✔

1827

        | columnNamesCopy |

5✔

1828

        (rowNumber < 1 or: [ rowNumber > self numberOfRows ]) ifTrue: [

5✔

1829

                SubscriptOutOfBounds signalFor: rowNumber ].

5✔

1830

5✔

1831

        columnNamesCopy := columnNames deepCopy.

5✔

1832

        columnNames removeAll.

5✔

1833

        columnNamesCopy withIndexDo: [ :columnName :j |

5✔

1834

                (aBlock value: (contents at: rowNumber at: j)) ifFalse: [

5✔

1835

                        columnNames add: columnName ] ].

5✔

1836

        contents

5✔

1837

                removeColumnsOfRowElementsSatisfying: aBlock

5✔

1838

                onRow: rowNumber.

5✔

1839

5✔

1840

        self numberOfColumns = 0 ifTrue: [ rowNames removeAll ]

5✔

1841

5✔

1844

DataFrame >> removeColumnsWithNilsAtRow: rowNumber [

5✔

1845

        "Removes all columns with nil values at row number rowNumber from the data frame"

5✔

1846

5✔

1847

        "(#(#(nil 2) #(3 nil)) asDataFrame removeColumnsWithNilsAtRow: 2) >>> (#(#(nil) #(3)) asDataFrame)"

5✔

1848

5✔

1849

        "(#(#(nil r1c2) #(r2c1 nil)) asDataFrame removeColumnsWithNilsAtRow: 2) >>> (#(#(nil) #(r2c1)) asDataFrame)"

5✔

1850

5✔

1851

        self

5✔

1852

                removeColumnsOfRowElementsSatisfying: [ :ele | ele isNil ]

5✔

1853

                onRow: rowNumber

5✔

1854

5✔

1857

DataFrame >> removeColumnsWithNilsAtRowNamed: rowName [

5✔

1858

        "Removes all columns with nil values at a row named rowName from the data frame"

5✔

1859

5✔

1860

        self removeColumnsOfRowElementsSatisfing: [ :ele | ele isNil ] onRowNamed: rowName

5✔

1861

5✔

1864

DataFrame >> removeDuplicatedRows [

5✔

1865

        "Removes duplicate rows of a dataframe except the first unique row"

5✔

1866

5✔

1867

        "(#(#(1 2) #(3 4) #(1 2)) asDataFrame removeDuplicatedRows) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

1868

5✔

1869

        "(#(#(r1c1) #(r2c1) #(r2c1) #(r2c1)) asDataFrame removeDuplicatedRows) >>> (#(#(r1c1) #(r2c1)) asDataFrame)"

5✔

1870

5✔

1871

        | numberOfRows nextRowIndex currentRow row aSet |

5✔

1872

        aSet := Set new.

5✔

1873

        numberOfRows := self numberOfRows.

5✔

1874

        1 to: numberOfRows do: [ :currentRowIndex |

5✔

1875

                currentRow := self rowAt: currentRowIndex.

5✔

1876

                nextRowIndex := currentRowIndex + 1.

5✔

1877

                nextRowIndex to: numberOfRows do: [ :index |

5✔

1878

                        row := self rowAt: index.

5✔

1879

                        row values = currentRow values ifTrue: [ aSet add: index ] ] ].

5✔

1880

        ^ self removeRowsAt: aSet

5✔

1881

5✔

1884

DataFrame >> removeRow: rowName [

5✔

1885

        "Removes the row named rowName from a data frame"

5✔

1886

5✔

1887

        | index |

5✔

1888

        index := self indexOfRowNamed: rowName.

5✔

1889

        self removeRowAt: index

5✔

1890

5✔

1893

DataFrame >> removeRowAt: rowNumber [

5✔

1894

        "Removes the row at row index rowNumber from a data frame"

5✔

1895

5✔

1896

        "(#(#(1 2) #(3 4)) asDataFrame removeRowAt: 2) >>> (#(#(1 2)) asDataFrame)"

5✔

1897

5✔

1898

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame removeRowAt: 2) >>> (#(#(r1c1 r1c2)) asDataFrame)"

5✔

1899

5✔

1900

        (rowNumber < 1 or: [ rowNumber > self numberOfRows ]) ifTrue: [

5✔

1901

                SubscriptOutOfBounds signalFor: rowNumber ].

5✔

1902

5✔

1903

        contents removeRowAt: rowNumber.

5✔

1904

        rowNames := rowNames copyWithoutIndex: rowNumber

5✔

1905

5✔

1908

DataFrame >> removeRows: aCollectionOfRowNames [

5✔

1909

        "Removes all rows from a data frame whose names are present in the collection aCollectionOfRowNames"

5✔

1910

5✔

1911

        aCollectionOfRowNames do: [ :each |

5✔

1912

                self removeRow: each ]

5✔

1913

5✔

1916

DataFrame >> removeRowsAt: aCollectionOfRowIndices [

5✔

1917

        "Removes all rows from a data frame whose row indices are present in the collection aCollectionOfRowIndices"

5✔

1918

5✔

1919

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame removeRowsAt: #(2 3)) >>> (#(#(1 2)) asDataFrame)"

5✔

1920

5✔

1921

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame removeRowsAt: #(2)) >>> (#(#(r1c1 r1c2)) asDataFrame)"

5✔

1922

5✔

1923

        | rowNamesToRemove |

5✔

1924

        rowNamesToRemove := aCollectionOfRowIndices collect: [ :i |

5✔

1925

                                    rowNames at: i ].

5✔

1926

        self removeRows: rowNamesToRemove

5✔

1927

5✔

1930

DataFrame >> removeRowsWhereElementsInColumnAt: columnNumber satisfy: aBlock [

5✔

1931

        "Removes rows from a data frame whose column elements at the column index columnNumber satisfy a given block"

5✔

1932

5✔

1933

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame removeRowsOfColumnElementsSatisfying: [ :x | x >= 4 ] onColumn: 2) >>> (#(#(1 2)) asDataFrame)"

5✔

1934

5✔

1935

        | rowNamesCopy |

5✔

1936

        (columnNumber < 1 or: [ columnNumber > self numberOfColumns ])

5✔

1937

                ifTrue: [ SubscriptOutOfBounds signalFor: columnNumber ].

5✔

1938

5✔

1939

        rowNamesCopy := rowNames deepCopy.

5✔

1940

        rowNames removeAll.

5✔

1941

        rowNamesCopy withIndexDo: [ :rowName :i |

5✔

1942

                (aBlock value: (contents at: i at: columnNumber)) ifFalse: [

5✔

1943

                        rowNames add: rowName ] ].

5✔

1944

        contents

5✔

1945

                removeRowsWhereElementsInColumnAt: columnNumber

5✔

1946

                satisfy: aBlock.

5✔

1947

5✔

1948

        self numberOfRows = 0 ifTrue: [ columnNames removeAll ]

5✔

1949

5✔

1952

DataFrame >> removeRowsWhereElementsInColumnNamed: columnName satisfy: aBlock [

5✔

1953

        "Removes rows from a data frame whose column elements at the column named columnName satisfy a given block"

5✔

1954

5✔

1955

        | index |

5✔

1956

        index := self indexOfColumnNamed: columnName.

5✔

1957

        self removeRowsWhereElementsInColumnAt: index satisfy: aBlock

5✔

1958

5✔

1961

DataFrame >> removeRowsWithNils [

5✔

1962

        "Removes all rows from a data frame which have atleast one nil value"

5✔

1963

5✔

1964

        "(#(#(1 2) #(nil 4) #(5 nil)) asDataFrame removeRowsWithNils) >>> (#(#(1 2)) asDataFrame)"

5✔

1965

5✔

1966

        "(#(#(r1c1 r1c2) #(nil r2c2)) asDataFrame removeRowsWithNils) >>> (#(#(r1c1 r1c2)) asDataFrame)"

5✔

1967

5✔

1968

        1 to: self numberOfColumns do: [ :i |

5✔

1969

                self

5✔

1970

                        removeRowsWhereElementsInColumnAt: i

5✔

1971

                        satisfy: [ :ele | ele isNil ] ]

5✔

1972

5✔

1975

DataFrame >> removeRowsWithNilsAtColumn: columnNumber [

5✔

1976

        "Removes all rows with nil values at column number columnNumber from the data frame"

5✔

1977

5✔

1978

        "(#(#(nil 2) #(3 nil)) asDataFrame removeRowsWithNilsAtColumn: 2) >>> (#(#(nil 2)) asDataFrame)"

5✔

1979

5✔

1980

        "(#(#(nil r1c2) #(r2c1 nil)) asDataFrame removeRowsWithNilsAtColumn: 2) >>> (#(#(nil r1c2)) asDataFrame)"

5✔

1981

5✔

1982

        self

5✔

1983

                removeRowsWhereElementsInColumnAt: columnNumber

5✔

1984

                satisfy: [ :ele | ele isNil ]

5✔

1985

5✔

1988

DataFrame >> removeRowsWithNilsAtColumnNamed: columnName [

5✔

1989

        "Removes all rows with nil values at a column named columnName from the data frame"

5✔

1990

5✔

1991

        self

5✔

1992

                removeRowsWhereElementsInColumnNamed: columnName

5✔

1993

                satisfy: [ :ele | ele isNil ]

5✔

1994

5✔

1997

DataFrame >> renameColumn: oldName to: newName [

5✔

1998

        "Find a column with oldName and rename it to newName"

5✔

1999

        | index |

5✔

2000

        index := self indexOfColumnNamed: oldName.

5✔

2001

        self columnNames at: index put: newName.

5✔

2002

5✔

2003

        self dataTypes at: newName put: (self dataTypes at: oldName).

5✔

2004

        self dataTypes removeKey: oldName

5✔

2005

5✔

2008

DataFrame >> renameRow: oldName to: newName [

5✔

2009

        "Find a row with oldName and rename it to newName"

5✔

2010

        | index |

5✔

2011

        index := self indexOfRowNamed: oldName.

5✔

2012

        self rowNames at: index put: newName

5✔

2013

5✔

2024

DataFrame >> replaceNilsWith: anObject [

5✔

2025

        "Replaces all nil values of a data frame with the object anObject"

5✔

2026

5✔

2027

        "(#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWith: 5) >>> (#(#(5 2) #(3 5)) asDataFrame)"

5✔

2028

5✔

2029

        "(#(#('nil' 'nil') #('nil' 'nil')) asDataFrame replaceNilsWith: 5) >>> (#(#('nil' 'nil') #('nil' 'nil')) asDataFrame)"

5✔

2030

5✔

2031

        "(#(#(nil 'nil') #('nil' 'nil')) asDataFrame replaceNilsWith: 5) >>> (#(#(5 'nil') #('nil' 'nil')) asDataFrame)"

5✔

2032

5✔

2033

        1 to: self numberOfColumns do: [ :columnIndex |

5✔

2034

                1 to: self numberOfRows do: [ :rowIndex |

5✔

2035

                        (self at: rowIndex at: columnIndex) ifNil: [

5✔

2036

                                self at: rowIndex at: columnIndex put: anObject ] ] ]

5✔

2037

5✔

2040

DataFrame >> replaceNilsWithAverage [

5✔

2041

        "Replaces all nil values of a data frame with the average value of the column in which it is present"

5✔

2042

5✔

2043

        "(#(#(nil 2) #(3 nil) #(5 6)) asDataFrame replaceNilsWithAverage) >>> (#(#(4 2) #(3 4) #(5 6)) asDataFrame)"

5✔

2044

5✔

2045

        "(#(#(1 2) #(3 4)) asDataFrame replaceNilsWithAverage) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

2046

5✔

2047

        | averageOfColumn |

5✔

2048

        1 to: self numberOfColumns do: [ :i |

5✔

2049

                averageOfColumn := ((self columnAt: i) select: [ :ele |

5✔

2050

                                            ele isNotNil ]) average.

5✔

2051

                1 to: self numberOfRows do: [ :j |

5✔

2052

                        (self at: j at: i) ifNil: [ self at: j at: i put: averageOfColumn ] ] ]

5✔

2053

5✔

2072

DataFrame >> replaceNilsWithMode [

5✔

2073

        "Replaces all nil values of a data frame with the mode of the column in which it is present"

5✔

2074

5✔

2075

        "(#(#(nil 2) #(3 nil) #(3 2)) asDataFrame replaceNilsWithMode) >>> (#(#(3 2) #(3 2) #(3 2)) asDataFrame)"

5✔

2076

5✔

2077

        "(#(#(1 2) #(3 4)) asDataFrame replaceNilsWithMode) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

2078

5✔

2079

        1 to: self numberOfColumns do: [ :i |

5✔

2080

                | modeOfColumn |

5✔

2081

                1 to: self numberOfRows do: [ :j |

5✔

2082

                        (self at: j at: i) ifNil: [

5✔

2083

                                self at: j at: i put: (modeOfColumn ifNil: [

5✔

2084

                                                 modeOfColumn := ((self columnAt: i) select: [ :ele |

5✔

2085

                                                                          ele isNotNil ]) mode ]) ] ].

5✔

2086

                modeOfColumn := nil ]

5✔

2087

5✔

2090

DataFrame >> replaceNilsWithNextRowValue [

5✔

2091

        "Replaces all nil values of a data frame with the next non-nil value of the column in which it is present. If there is no non-nil value after it, it is not replaced"

5✔

2092

5✔

2093

        "(#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWithNextRowValue) >>> (#(#(3 2) #(3 nil)) asDataFrame)"

5✔

2094

5✔

2095

        "(#(#(1 2) #(3 4)) asDataFrame replaceNilsWithNextRowValue) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

2096

5✔

2097

        | value numberOfRows |

5✔

2098

        numberOfRows := self numberOfRows.

5✔

2099

        1 to: self numberOfColumns do: [ :i |

5✔

2100

                self numberOfRows to: 1 by: -1 do: [ :j |

5✔

2101

                        j < numberOfRows ifTrue: [

5✔

2102

                                (self at: j at: i) ifNil: [ self at: j at: i put: value ] ].

5✔

2103

                        value := self at: j at: i ] ]

5✔

2104

5✔

2107

DataFrame >> replaceNilsWithPreviousRowValue [

5✔

2108

        "Replaces all nil values of a data frame with the previous non-nil value of the column in which it is present. If there is no non-nil value before it, it is not replaced"

5✔

2109

5✔

2110

        "(#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWithPreviousRowValue) >>> (#(#(nil 2) #(3 2)) asDataFrame)"

5✔

2111

5✔

2112

        "(#(#(1 2) #(3 4)) asDataFrame replaceNilsWithPreviousRowValue) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

2113

5✔

2114

        | value |

5✔

2115

        1 to: self numberOfColumns do: [ :i |

5✔

2116

                1 to: self numberOfRows do: [ :j |

5✔

2117

                        j > 1 ifTrue: [

5✔

2118

                                (self at: j at: i) ifNil: [ self at: j at: i put: value ] ].

5✔

2119

                        value := self at: j at: i ] ]

5✔

2120

5✔

2123

DataFrame >> replaceNilsWithZero [

5✔

2124

        "Replaces all nil values of a data frame with zero"

5✔

2125

5✔

2126

        "(#(#(nil 2) #(3 nil)) asDataFrame replaceNilsWithZero) >>> (#(#(0 2) #(3 0)) asDataFrame)"

5✔

2127

5✔

2128

        "(#(#(1 2) #(3 4)) asDataFrame replaceNilsWithZero) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

2129

5✔

2130

        self replaceNilsWith: 0

5✔

2131

5✔

2134

DataFrame >> rightJoin: aDataFrame [

5✔

2135

        "Performs right join on aDataFrame with rowNames as keys"

5✔

2136

5✔

2137

        | outputDf commonRows |

5✔

2138

5✔

2139

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

2140

        commonRows := self rowNames intersection: aDataFrame rowNames.

5✔

2141

5✔

2142

        aDataFrame rowNames do: [ :rowName |

5✔

2143

                | rowToAdd |

5✔

2144

                rowToAdd := (commonRows includes: rowName)

5✔

2145

                        ifTrue: [ (self row: rowName) asArray , (aDataFrame row: rowName) asArray ]

5✔

2146

                        ifFalse: [ (Array new: self columnNames size) , (aDataFrame row: rowName) asArray ].

5✔

2147

                outputDf addRow: rowToAdd named: rowName ].

5✔

2148

5✔

2149

        ^ outputDf

5✔

2150

5✔

2153

DataFrame >> rightJoin: aDataFrame on: aColumnName [

5✔

2154

        "Right join of self with aDataFrame on a column that has a name aColumnName in both data frames"

5✔

2155

        ^ self rightJoin: aDataFrame onLeft: aColumnName onRight: aColumnName

5✔

2156

5✔

2159

DataFrame >> rightJoin: aDataFrame onLeft: leftColumn onRight: rightColumn [

5✔

2160

        "Performs right join on aDataFrame with rowNames as keys.

5✔

2161

         rowNames are not preserved.

5✔

2162

         Duplicate column names will be appended with '_x' and '_y'."

5✔

2163

5✔

2164

        | outputDf commonRows leftNils |

5✔

2165

5✔

2166

        outputDf := self class withColumnNames: (self getJointColumnsWith: aDataFrame).

5✔

2167

5✔

2168

        commonRows := (self column: leftColumn) asArray intersection: (aDataFrame column: rightColumn) asArray.

5✔

2169

5✔

2170

        1 to: aDataFrame size do: [ :rowIndex |

5✔

2171

                | rowToAdd rowsWithSameKey |

5✔

2172

                (commonRows includes: (aDataFrame at: rowIndex at: (aDataFrame indexOfColumnNamed: rightColumn)))

5✔

2173

                ifTrue: [

5✔

2174

                        "Row present in both df - append rows and add to outputDf"

5✔

2175

                        rowsWithSameKey := self findAllIndicesOf: (aDataFrame at: rowIndex at: (aDataFrame indexOfColumnNamed: rightColumn)) atColumn: leftColumn.

5✔

2176

                        rowsWithSameKey do: [ :leftRow |

5✔

2177

                                rowToAdd := (self rowAt: leftRow) asArray, (aDataFrame rowAt: rowIndex) asArray.

5✔

2178

                                outputDf addRow: rowToAdd named: (outputDf size + 1).

5✔

2179

5✔

2180

5✔

2181

                ifFalse: [

5✔

2182

                        "Row present in right-only - construct row and append"

5✔

2183

                        leftNils := self columnNames collect: [ :col |

5✔

2184

                                col = rightColumn

5✔

2185

                                        ifTrue: [ (aDataFrame rowAt: rowIndex) at: rightColumn ]

5✔

2186

                                        ifFalse: [ nil ] ].

5✔

2187

                        rowToAdd := leftNils, (aDataFrame rowAt: rowIndex) asArray.

5✔

2188

                        outputDf addRow: rowToAdd named: (outputDf size + 1).

5✔

2189

].

5✔

2190

].

5✔

2191

5✔

2192

        "Since Key is common, remove duplicate key column if it is of same name"

5✔

2193

        (leftColumn = rightColumn) ifTrue: [

5✔

2194

                outputDf removeColumn: (rightColumn, '_y').

5✔

2195

                outputDf renameColumn: (leftColumn, '_x') to: leftColumn.

5✔

2196

].

5✔

2197

5✔

2198

        ^ outputDf

5✔

2199

5✔

2202

DataFrame >> row: rowName [

5✔

2203

        "Answer the row with rowName as a DataSeries or signal an exception if a row with that name was not found"

5✔

2204

        | index |

5✔

2205

        index := self indexOfRowNamed: rowName.

5✔

2206

        ^ self rowAt: index

5✔

2207

5✔

2210

DataFrame >> row: rowName ifAbsent: exceptionBlock [

5✔

2211

        "Answer the row with rowName as a DataSeries or evaluate exception block if a row with that name was not found"

5✔

2212

        | index |

5✔

2213

        index := self

5✔

2214

                indexOfRowNamed: rowName

5✔

2215

                ifAbsent: [ ^ exceptionBlock value ].

5✔

2216

5✔

2217

        ^ self rowAt: index

5✔

2218

5✔

2221

DataFrame >> row: rowName put: anArray [

5✔

2222

        "Replace the current values of row with rowName with anArray or signal an exception if a row with that name was not found"

5✔

2223

        | index |

5✔

2224

        index := self indexOfRowNamed: rowName.

5✔

2225

        ^ self rowAt: index put: anArray

5✔

2226

5✔

2229

DataFrame >> row: rowName put: anArray ifAbsent: exceptionBlock [

5✔

2230

        "Replace the current values of row with rowName with anArray or evaluate exception block if a row with that name was not found"

5✔

2231

        | index |

5✔

2232

        index := self

5✔

2233

                indexOfRowNamed: rowName

5✔

2234

                ifAbsent: [ ^ exceptionBlock value ].

5✔

2235

5✔

2236

        ^ self rowAt: index put: anArray

5✔

2237

5✔

2240

DataFrame >> row: rowName transform: aBlock [

5✔

2241

        "Evaluate aBlock on the row with rowName and replace row with the result. Signal an exception if rowName was not found"

5✔

2242

        | row |

5✔

2243

        row := self row: rowName.

5✔

2244

        self row: rowName put: (aBlock value: row) asArray

5✔

2245

5✔

2248

DataFrame >> row: rowName transform: aBlock ifAbsent: exceptionBlock [

5✔

2249

        "Evaluate aBlock on the row with rowName and replace row with the result. Evaluate exceptionBlock if rowName was not found"

5✔

2250

        | row |

5✔

2251

        row := self row: rowName ifAbsent: [ ^ exceptionBlock value ].

5✔

2252

        self row: rowName put: (aBlock value: row)

5✔

2253

5✔

2256

DataFrame >> rowAt: aNumber [

5✔

2257

        "Returns the row of a DataFrame at row index aNumber"

5✔

2258

5✔

2259

        "(#(#(1 2) #(5 6)) asDataFrame rowAt: 2) >>> (#(5 6) asDataSeries) "

5✔

2260

5✔

2261

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame rowAt: 2) >>> (#(r2c1 r2c2) asDataSeries) "

5✔

2262

5✔

2263

        | series |

5✔

2264

        series := (contents rowAt: aNumber) asDataSeries.

5✔

2265

        series name: (self rowNames at: aNumber).

5✔

2266

        series keys: self columnNames.

5✔

2267

        ^ series

5✔

2268

5✔

2271

DataFrame >> rowAt: aNumber put: anArray [

5✔

2272

        "Replaces the row at row index aNumber with contents of the array anArray"

5✔

2273

5✔

2274

        "(#(#(1 2) #(3 4)) asDataFrame rowAt: 2 put: #(5 6)) >>> (#(#(1 2) #(5 6)) asDataFrame) "

5✔

2275

5✔

2276

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame rowAt: 2 put: #(R2C1 R2C2)) >>> (#(#(r1c1 r1c2) #(R2C1 R2C2)) asDataFrame) "

5✔

2277

5✔

2278

        anArray size = self numberOfColumns ifFalse: [ SizeMismatch signal ].

5✔

2279

5✔

2280

        contents rowAt: aNumber put: anArray

5✔

2281

5✔

2284

DataFrame >> rowAt: aNumber transform: aBlock [

5✔

2285

        "Evaluate aBlock on the row at aNumber and replace that row with the result"

5✔

2286

5✔

2287

        "(#(#(1 2) #(3 4)) asDataFrame rowAt: 2 transform: [ :x | x + 1 ]) >>> (#(#(1 2) #(4 5)) asDataFrame) "

5✔

2288

5✔

2289

        | row |

5✔

2290

        row := self rowAt: aNumber.

5✔

2291

        self rowAt: aNumber put: (aBlock value: row) asArray

5✔

2292

5✔

2295

DataFrame >> rowNames [

5✔

2296

        "Returns the row names of a DataFrame"

5✔

2297

5✔

2298

        ^ rowNames

5✔

2299

5✔

2302

DataFrame >> rowNames: anArray [

5✔

2303

        "Sets the row names of a DataFrame with contents of the collection aCollection"

5✔

2304

5✔

2305

        anArray size = self numberOfRows ifFalse: [ SizeMismatch signal: 'Wrong number of row names' ].

5✔

2306

5✔

2307

        anArray asSet size = anArray size ifFalse: [ Error signal: 'All row names must be distinct' ].

5✔

2308

5✔

2309

        self privateRowNames: anArray

5✔

2310

5✔

2313

DataFrame >> rows [

5✔

2314

        "Returns a collection of all rows"

5✔

2315

5✔

2316

        "(#(#(1 2) #(3 4)) asDataFrame rows) >>> (#( #(1 2) #(3 4) ) collect: #asDataSeries) "

5✔

2317

5✔

2318

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame rows) >>> (#( #(r1c1 r1c2) #(r2c1 r2c2) ) collect: #asDataSeries) "

5✔

2319

5✔

2320

        ^ (1 to: self numberOfRows) collect: [ :j | self rowAt: j ]

5✔

2321

5✔

2324

DataFrame >> rows: anArrayOfNames [

5✔

2325

        "Returns a collection of rows whose row names are present in the array anArrayOfNames"

5✔

2326

5✔

2327

        | anArrayOfNumbers |

5✔

2328

5✔

2329

        anArrayOfNumbers := anArrayOfNames

5✔

2330

                collect: [ :name |

5✔

2331

                        self indexOfRowNamed: name ].

5✔

2332

5✔

2333

        ^ self rowsAt: anArrayOfNumbers

5✔

2334

5✔

2337

DataFrame >> rows: anArrayOfRowNames put: anArrayOfArrays [

5✔

2338

        "Replaces the rows whose row names are present in the array anArrayOfRowNames with the contents of the array of arrays anArrayOfArrays"

5✔

2339

5✔

2340

        anArrayOfArrays size = anArrayOfRowNames size

5✔

2341

                ifFalse: [ SizeMismatch signal ].

5✔

2342

5✔

2343

        anArrayOfRowNames with: anArrayOfArrays do: [ :name :array |

5✔

2344

                self row: name put: array ]

5✔

2345

5✔

2348

DataFrame >> rowsAt: anArrayOfNumbers [

5✔

2349

        "Returns a collection of rows whose row indices are present in the array anArrayOfNumbers"

5✔

2350

5✔

2351

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame rowsAt: #(1 3)) >>> (#(#(1 2) #(5 6)) asDataFrame)"

5✔

2352

5✔

2353

        "(#(#(r1c1 r1c2) #(r2c1 r2c2) #(r3c1 r3c2)) asDataFrame rowsAt: #(1 3)) >>> (#(#(r1c1 r1c2) #(r3c1 r3c2)) asDataFrame)"

5✔

2354

5✔

2355

        | newRowNames |

5✔

2356

        newRowNames := anArrayOfNumbers collect: [ :i | self rowNames at: i ].

5✔

2357

5✔

2358

        ^ DataFrame

5✔

2359

                  withDataFrameInternal: (self contents rowsAt: anArrayOfNumbers)

5✔

2360

                  rowNames: newRowNames

5✔

2361

                  columnNames: self columnNames

5✔

2362

5✔

2365

DataFrame >> rowsAt: anArrayOfNumbers put: anArrayOfArrays [

5✔

2366

        "Replaces the rows whose row indices are present in the array anArrayOfNumbers with the contents of the array of arrays anArrayOfArrays"

5✔

2367

5✔

2368

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame rowsAt: #(1 3) put: #((10 20)(50 60))) >>> (#(#(10 20) #(3 4) #(50 60)) asDataFrame)"

5✔

2369

5✔

2370

        anArrayOfArrays size = anArrayOfNumbers size ifFalse: [

5✔

2371

                SizeMismatch signal ].

5✔

2372

5✔

2373

        anArrayOfNumbers

5✔

2374

                with: anArrayOfArrays

5✔

2375

                do: [ :index :array | self rowAt: index put: array ]

5✔

2376

5✔

2379

DataFrame >> rowsFrom: begin to: end [

5✔

2380

        "Returns a collection of rows whose row indices are present between begin and end"

5✔

2381

5✔

2382

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame rowsFrom: 1 to: 2) >>> (#(#(1 2) #(3 4)) asDataFrame)"

5✔

2383

5✔

2384

        "(#(#(r1c1 r1c2) #(r2c1 r2c2) #(r3c1 r3c2)) asDataFrame rowsFrom: 1 to: 2) >>> (#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame)"

5✔

2385

5✔

2386

        ^ self rowsAt: (begin to: end)

5✔

2387

5✔

2390

DataFrame >> rowsFrom: firstNumber to: secondNumber put: anArrayOfArrays [

5✔

2391

        "Replaces the rows whose row indices are present between firstNumber and secondNumber with the contents of the array of arrays anArrayOfArrays"

5✔

2392

5✔

2393

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame rowsFrom: 1 to: 2 put: #(#(7 8) #(9 10))) >>> (#(#(7 8) #(9 10) #(5 6)) asDataFrame)"

5✔

2394

5✔

2395

        | interval |

5✔

2396

        anArrayOfArrays size = ((firstNumber - secondNumber) abs + 1)

5✔

2397

                ifFalse: [ SizeMismatch signal ].

5✔

2398

5✔

2399

        interval := secondNumber >= firstNumber

5✔

2400

                            ifTrue: [ firstNumber to: secondNumber ]

5✔

2401

                            ifFalse: [ (secondNumber to: firstNumber) reversed ].

5✔

2402

5✔

2403

        interval withIndexDo: [ :rowIndex :i |

5✔

2404

                self rowAt: rowIndex put: (anArrayOfArrays at: i) ]

5✔

2405

5✔

2408

DataFrame >> select: aBlock [

5✔

2409

        "Evaluate aBlock with each of the receiver's elements as the argument.

5✔

2410

        Collect into a new collection like the receiver, only those elements for

5✔

2411

        which aBlock evaluates to true. Answer the new collection."

5✔

2412

5✔

2413

        | selectedIndexes |

5✔

2414

5✔

2415

        selectedIndexes := (1 to: self numberOfRows) select: [ :index |

5✔

2416

                aBlock value: (self at: index) ].

5✔

2417

5✔

2418

        ^ self rowsAt: selectedIndexes

5✔

2419

5✔

2422

DataFrame >> setDefaultRowColumnNames [

5✔

2423

5✔

2424

        self privateRowNames: (1 to: self numberOfRows).

5✔

2425

        self columnNames: (1 to: self numberOfColumns)

5✔

2426

5✔

2429

DataFrame >> shuffleBy: aNumber [

5✔

2430

        "Modify the receiver but with its elements in random positions.

5✔

2431

        This method use Random class as random generator"

5✔

2432

5✔

2433

        | newShuffledArray shuffledRows container newRowNames |

5✔

2434

5✔

2435

        container := contents asArray2D.

5✔

2436

        newShuffledArray := container class rows: self numberOfRows columns: self numberOfColumns.

5✔

2437

        newRowNames := rowNames class ofSize: self numberOfRows.

5✔

2438

        shuffledRows := (1 to: self numberOfRows) asArray shuffleBy: aNumber.

5✔

2439

        shuffledRows withIndexDo: [ : i : newRowindex |

5✔

2440

                newRowNames at: newRowindex put: (rowNames at: i).

5✔

2441

                newShuffledArray atRow: newRowindex put: (container atRow: i) ].

5✔

2442

        contents := DataFrameInternal fromArray2D: newShuffledArray.

5✔

2443

        rowNames := newRowNames.

5✔

2444

5✔

2447

DataFrame >> shuffleWithSeed: aNumber [

5✔

2448

        " Fix the random seed with aNumber to ensure reproducibility "

5✔

2449

5✔

2450

        ^ self shuffleBy: (Random new seed: aNumber)

5✔

2451

5✔

2460

DataFrame >> size [

5✔

2461

        "Returns the number of rows of a DataFrame"

5✔

2462

5✔

2463

        "(#(#(1 2) #(3 4) #(5 6)) asDataFrame size) >>> 3 "

5✔

2464

5✔

2465

        "(#(#(1 2 3) #(4 5 6)) asDataFrame size) >>> 2 "

5✔

2466

5✔

2467

        "(#() asDataFrame size) >>> 0 "

5✔

2468

5✔

2469

        ^ self numberOfRows

5✔

2470

5✔

2473

DataFrame >> sortBy: columnName [

5✔

2474

        "Rearranges the rows of the data frame in ascending order of the values in the column named columnName"

5✔

2475

5✔

2476

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortBy: 1) >>> (#(#(1 4) #(2 4) #(3 2)) asDataFrame)"

5✔

2477

5✔

2478

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortBy: 2) >>> (#(#(3 2) #(1 4) #(2 4)) asDataFrame)"

5✔

2479

5✔

2480

        self sortBy: columnName using: [ :a :b | a <= b ]

5✔

2481

5✔

2484

DataFrame >> sortBy: columnName using: aBlock [

5✔

2485

        "Rearranges the rows of the data frame by applying the given block on the column named columnName"

5✔

2486

5✔

2487

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortBy: 1 using: [ :a :b | a <= b ]) >>> (#(#(1 4) #(2 4) #(3 2)) asDataFrame)"

5✔

2488

5✔

2489

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortBy: 2 using: [ :a :b | a <= b ]) >>> (#(#(3 2) #(1 4) #(2 4)) asDataFrame)"

5✔

2490

5✔

2491

        | column sortedKeys newContents |

5✔

2492

        column := self column: columnName.

5✔

2493

        column := column copy.

5✔

2494

        column sort: aBlock.

5✔

2495

        sortedKeys := column keys.

5✔

2496

5✔

2497

        newContents := DataFrameInternal new: self dimensions.

5✔

2498

5✔

2499

        sortedKeys withIndexDo: [ :key :i |

5✔

2500

                newContents rowAt: i put: (self row: key) asArray ].

5✔

2501

5✔

2502

        contents := newContents.

5✔

2503

        self rowNames: sortedKeys

5✔

2504

5✔

2507

DataFrame >> sortByAll: arrayOfColumnNames [

5✔

2508

        " Chain sorts the data frame in ascending order. The data frame is sorted based on the first column in the array of column names, if there are same values, then it sorts these same values based on the values of the second column and so on.."

5✔

2509

5✔

2510

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortByAll: #(1 2)) >>> (#(#(1 4) #(2 4) #(3 2)) asDataFrame)"

5✔

2511

5✔

2512

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortByAll: #(2 1)) >>> (#(#(3 2) #(1 4) #(2 4)) asDataFrame)"

5✔

2513

5✔

2514

        arrayOfColumnNames reverseDo: [ :columnName |

5✔

2515

                self sortBy: columnName using: [ :a :b | a <= b ] ].

5✔

2516

        ^ self

5✔

2517

5✔

2520

DataFrame >> sortByRowNames [

5✔

2521

        "Sorts the rows of the data frame based on the row names in ascending order"

5✔

2522

5✔

2523

        self sortByRowNamesUsing: [ :a :b | a <= b ]

5✔

2524

5✔

2527

DataFrame >> sortByRowNamesUsing: aBlock [

5✔

2528

        "Sorts the rows of the data frame based on the row names using the given comparison block"

5✔

2529

5✔

2530

        | sortedKeys newContents |

5✔

2531

        sortedKeys := self rowNames sorted: aBlock.

5✔

2532

5✔

2533

        newContents := DataFrameInternal new: self dimensions.

5✔

2534

5✔

2535

        sortedKeys withIndexDo: [ :key :i |

5✔

2536

                newContents rowAt: i put: (self row: key) asArray ].

5✔

2537

5✔

2538

        contents := newContents.

5✔

2539

        self rowNames: sortedKeys

5✔

2540

5✔

2543

DataFrame >> sortDescendingBy: columnName [

5✔

2544

        "Rearranges the rows of the data frame in descending order of the values in the column named columnName"

5✔

2545

5✔

2546

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortDescendingBy: 1) >>> (#(#(3 2) #(2 4) #(1 4)) asDataFrame)"

5✔

2547

5✔

2548

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortDescendingBy: 2) >>> (#(#(1 4) #(2 4) #(3 2)) asDataFrame)"

5✔

2549

5✔

2550

        self sortBy: columnName using: [ :a :b | a >= b ]

5✔

2551

5✔

2554

DataFrame >> sortDescendingByAll: arrayOfColumnNames [

5✔

2555

        " Chain sorts the data frame in descending order. The data frame is sorted based on the first column in the array of column names, if there are same values, then it sorts these same values based on the values of the second column and so on.."

5✔

2556

5✔

2557

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortDescendingByAll: #(1 2)) >>> (#(#(3 2) #(2 4) #(1 4)) asDataFrame)"

5✔

2558

5✔

2559

        "(#(#(3 2) #(1 4) #(2 4)) asDataFrame sortDescendingByAll: #(2 1)) >>> (#(#(2 4) #(1 4) #(3 2)) asDataFrame)"

5✔

2560

5✔

2561

        arrayOfColumnNames reverseDo: [ :columnName |

5✔

2562

                self sortBy: columnName using: [ :a :b | a >= b ] ].

5✔

2563

        ^ self

5✔

2564

5✔

2567

DataFrame >> sortDescendingByRowNames [

5✔

2568

        "Sorts the rows of the data frame based on the row names in descending order"

5✔

2569

5✔

2570

        self sortByRowNamesUsing: [ :a :b | a >= b ]

5✔

2571

5✔

2574

DataFrame >> stdev [

5✔

2575

        "Standard deviation is a measure of how dispersed the data is in relation to the average"

5✔

2576

5✔

2577

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame stdev) >>> (Dictionary newFrom: {(1 -> 10).(2 -> 1)})"

5✔

2578

5✔

2579

        ^ self applyToAllColumns: #stdev

5✔

2580

5✔

2596

DataFrame >> tail [

5✔

2597

        "Returns the last 5 rows of a DataFrame"

5✔

2598

5✔

2599

        ^ self tail: self defaultHeadTailSize

5✔

2600

5✔

2603

DataFrame >> tail: aNumber [

5✔

2604

        "Returns the last aNumber rows of aDataFrame"

5✔

2605

        | rows |

5✔

2606

        rows := self numberOfRows.

5✔

2607

5✔

2608

        ^ self rowsAt: (rows - (rows min: aNumber) + 1 to: rows)

5✔

2609

5✔

2612

DataFrame >> thirdQuartile [

5✔

2613

        "75% of the values in a set are smaller than or equal to the third Quartile of that set"

5✔

2614

5✔

2615

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame thirdQuartile) >>> (Dictionary newFrom: {(1 -> 30).(2 -> 3)})"

5✔

2616

5✔

2617

        ^ self applyToAllColumns: #thirdQuartile

5✔

2618

5✔

2621

DataFrame >> toColumn: columnName applyElementwise: aBlock [

5✔

2622

        "Applies a given block to a column named columnName of a data frame"

5✔

2623

5✔

2624

        | column |

5✔

2625

        column := (self column: columnName) asArray.

5✔

2626

        column := column collect: [ :each | aBlock value: each ].

5✔

2627

        self column: columnName put: column asArray

5✔

2628

5✔

2631

DataFrame >> toColumnAt: columnNumber applyElementwise: aBlock [

5✔

2632

        "Applies a given block to a column whose column index is columnNumber of a data frame"

5✔

2633

5✔

2634

        "(#(#(1 2) #(3 4)) asDataFrame toColumnAt: 1 applyElementwise:[ :x | x - 1 ]) >>> (#(#(0 2) #(2 4)) asDataFrame)"

5✔

2635

5✔

2636

        | columnName |

5✔

2637

        columnName := self columnNames at: columnNumber.

5✔

2638

        ^ self toColumn: columnName applyElementwise: aBlock

5✔

2639

5✔

2642

DataFrame >> toColumns: arrayOfColumnNames applyElementwise: aBlock [

5✔

2643

        "Applies a given block to columns whose names are present in the array arrayOfColumnNames of a data frame"

5✔

2644

5✔

2645

        arrayOfColumnNames do: [ :each |

5✔

2646

                self toColumn: each applyElementwise: aBlock ]

5✔

2647

5✔

2650

DataFrame >> toColumnsAt: arrayOfColumnNumbers applyElementwise: aBlock [

5✔

2651

        "Applies a given block to columns whose indices are present in the array arrayOfColumnNumbers of a data frame"

5✔

2652

5✔

2653

        "(#(#(1 2) #(3 4)) asDataFrame toColumnsAt: #(1 2) applyElementwise:[ :x | x - 1 ]) >>> (#(#(0 1) #(2 3)) asDataFrame)"

5✔

2654

5✔

2655

        arrayOfColumnNumbers do: [ :each |

5✔

2656

                self toColumnAt: each applyElementwise: aBlock ]

5✔

2657

5✔

2660

DataFrame >> toHtml [

5✔

2661

        "Prints the DataFrame as an HTML formatted table"

5✔

2662

5✔

2663

        | html columnWidths dataFrame |

5✔

2664

        dataFrame := self copy.

5✔

2665

        dataFrame addColumn: dataFrame rowNames named: '#' atPosition: 1.

5✔

2666

        html := WriteStream on: String new.

5✔

2667

        html

5✔

2668

                nextPutAll: '<table border="1" class="dataframe">';

5✔

2669

cr;

5✔

2670

                nextPutAll: '  <thead>';

5✔

2671

cr;

5✔

2672

                nextPutAll: '    <tr style="text-align: left;">'.

5✔

2673

5✔

2674

        columnWidths := dataFrame columnNames collect: [ :columnName |

5✔

2675

                                | maxWidth |

5✔

2676

                                maxWidth := columnName asString size.

5✔

2677

                                dataFrame rows do: [ :row |

5✔

2678

                                        | value |

5✔

2679

                                        value := row at: columnName.

5✔

2680

                                        maxWidth := maxWidth max: value printString size ].

5✔

2681

                                maxWidth ].

5✔

2682

5✔

2683

        dataFrame columnNames withIndexDo: [ :columnName :index |

5✔

2684

                | paddedColumnName |

5✔

2685

                paddedColumnName := columnName asString padRightTo: (columnWidths at: index).

5✔

2686

                html

5✔

2687

                        nextPutAll: '      <th>';

5✔

2688

                        nextPutAll: paddedColumnName;

5✔

2689

                        nextPutAll: '</th>';

5✔

2690

                        cr ].

5✔

2691

5✔

2692

        html

5✔

2693

                nextPutAll: '    </tr>';

5✔

2694

cr;

5✔

2695

                nextPutAll: '  </thead>';

5✔

2696

cr;

5✔

2697

                nextPutAll: '  <tbody>';

5✔

2698

cr.

5✔

2699

5✔

2700

        dataFrame asArrayOfRows do: [ :row |

5✔

2701

                html nextPutAll: '    <tr>'.

5✔

2702

5✔

2703

                row withIndexDo: [ :value :index |

5✔

2704

                        | paddedValue |

5✔

2705

                        paddedValue := value printString padRightTo:

5✔

2706

                                               (columnWidths at: index).

5✔

2707

                        index = 1

5✔

2708

                                ifFalse: [

5✔

2709

                                        html

5✔

2710

                                                nextPutAll: '      <td>';

5✔

2711

                                                nextPutAll: paddedValue;

5✔

2712

                                                nextPutAll: '</td>';

5✔

2713

                                                cr ]

5✔

2714

                                ifTrue: [

5✔

2715

                                        html

5✔

2716

                                                nextPutAll: '      <th>';

5✔

2717

                                                nextPutAll: paddedValue;

5✔

2718

                                                nextPutAll: '</th>';

5✔

2719

                                                cr ] ].

5✔

2720

5✔

2721

                html

5✔

2722

                        nextPutAll: '    </tr>';

5✔

2723

                        cr ].

5✔

2724

5✔

2725

        html

5✔

2726

                nextPutAll: '  </tbody>';

5✔

2727

cr;

5✔

2728

                nextPutAll: '</table>'.

5✔

2729

5✔

2730

        ^ html contents

5✔

2731

5✔

2734

DataFrame >> toLatex [

5✔

2735

        " Prints the DataFrame as a Latex formatted table"

5✔

2736

5✔

2737

        | markdown columnWidths dataFrame |

5✔

2738

        dataFrame := self copy.

5✔

2739

        dataFrame addColumn: dataFrame rowNames named: '\#' atPosition: 1.

5✔

2740

        markdown := WriteStream on: String new.

5✔

2741

        markdown nextPutAll: '\begin{tabular}{|'.

5✔

2742

        dataFrame numberOfColumns timesRepeat: [ markdown nextPutAll: 'l|' ].

5✔

2743

        markdown nextPutAll: '}'.

5✔

2744

        markdown cr.

5✔

2745

        markdown nextPutAll: '\hline'.

5✔

2746

        markdown cr.

5✔

2747

5✔

2748

        columnWidths := dataFrame columnNames collect: [ :columnName |

5✔

2749

                                | maxWidth |

5✔

2750

                                maxWidth := columnName asString size.

5✔

2751

                                dataFrame rows do: [ :row |

5✔

2752

                                        | value |

5✔

2753

                                        value := row at: columnName.

5✔

2754

                                        maxWidth := maxWidth max: value printString size ].

5✔

2755

                                maxWidth ].

5✔

2756

5✔

2757

        dataFrame columnNames withIndexDo: [ :columnName :index |

5✔

2758

                | paddedColumnName |

5✔

2759

                paddedColumnName := columnName asString padRightTo: (columnWidths at: index).

5✔

2760

                index = dataFrame numberOfColumns

5✔

2761

                        ifFalse: [ markdown nextPutAll: paddedColumnName , ' & ' ]

5✔

2762

                        ifTrue: [ markdown nextPutAll: paddedColumnName ] ].

5✔

2763

        markdown nextPutAll: '\\'.

5✔

2764

        markdown cr.

5✔

2765

        markdown nextPutAll: '\hline'.

5✔

2766

        markdown cr.

5✔

2767

5✔

2768

5✔

2769

5✔

2770

        dataFrame asArrayOfRows do: [ :row |

5✔

2771

                row withIndexDo: [ :value :index |

5✔

2772

                        | paddedValue |

5✔

2773

                        paddedValue := value printString padRightTo:

5✔

2774

                                               (columnWidths at: index).

5✔

2775

                        index = dataFrame numberOfColumns

5✔

2776

                                ifFalse: [ markdown nextPutAll: paddedValue , ' & ' ]

5✔

2777

                                ifTrue: [ markdown nextPutAll: paddedValue ] ].

5✔

2778

                markdown nextPutAll: '\\'.

5✔

2779

                markdown cr.

5✔

2780

                markdown nextPutAll: '\hline'.

5✔

2781

                markdown cr ].

5✔

2782

        markdown nextPutAll: '\end{tabular}'.

5✔

2783

        ^ markdown contents

5✔

2784

5✔

2787

DataFrame >> toMarkdown [

5✔

2788

        " Prints the DataFrame as a Markdown formatted table"

5✔

2789

5✔

2790

        | markdown columnWidths dataFrame |

5✔

2791

        dataFrame := self copy.

5✔

2792

        dataFrame addColumn: dataFrame rowNames named: '#' atPosition: 1.

5✔

2793

        markdown := WriteStream on: String new.

5✔

2794

        markdown nextPutAll: '| '.

5✔

2795

5✔

2796

        columnWidths := dataFrame columnNames collect: [ :columnName |

5✔

2797

                                | maxWidth |

5✔

2798

                                maxWidth := columnName asString size.

5✔

2799

                                dataFrame rows do: [ :row |

5✔

2800

                                        | value |

5✔

2801

                                        value := row at: columnName.

5✔

2802

                                        maxWidth := maxWidth max: value printString size ].

5✔

2803

                                maxWidth ].

5✔

2804

5✔

2805

        dataFrame columnNames withIndexDo: [ :columnName :index |

5✔

2806

                | paddedColumnName |

5✔

2807

                paddedColumnName := columnName asString padRightTo: (columnWidths at: index).

5✔

2808

                markdown nextPutAll: paddedColumnName , ' | ' ].

5✔

2809

        markdown cr.

5✔

2810

        markdown nextPutAll: '| '.

5✔

2811

5✔

2812

        columnWidths do: [ :width |

5✔

2813

                | secondRow |

5✔

2814

                secondRow := '-'.

5✔

2815

                width - 1 timesRepeat: [ secondRow := secondRow , '-' ].

5✔

2816

                markdown nextPutAll: secondRow , ' | ' ].

5✔

2817

5✔

2818

        markdown cr.

5✔

2819

5✔

2820

        dataFrame asArrayOfRows do: [ :row |

5✔

2821

                markdown nextPutAll: '| '.

5✔

2822

                row withIndexDo: [ :value :index |

5✔

2823

                        | paddedValue |

5✔

2824

                        paddedValue := value printString padRightTo:

5✔

2825

                                               (columnWidths at: index).

5✔

2826

                        markdown nextPutAll: paddedValue , ' | ' ].

5✔

2827

                markdown cr ].

5✔

2828

5✔

2829

        ^ markdown contents

5✔

2830

5✔

2833

DataFrame >> toString [

5✔

2834

        " Prints the DataFrame as a String formatted table"

5✔

2835

5✔

2836

        | stringTable columnWidths dataFrame |

5✔

2837

        dataFrame := self copy.

5✔

2838

        dataFrame addColumn: dataFrame rowNames named: '#' atPosition: 1.

5✔

2839

        stringTable := WriteStream on: String new.

5✔

2840

5✔

2841

        columnWidths := dataFrame columnNames collect: [ :columnName |

5✔

2842

                                | maxWidth |

5✔

2843

                                maxWidth := columnName asString size.

5✔

2844

                                dataFrame rows do: [ :row |

5✔

2845

                                        | value |

5✔

2846

                                        value := row at: columnName.

5✔

2847

                                        maxWidth := maxWidth max: value printString size ].

5✔

2848

                                maxWidth ].

5✔

2849

5✔

2850

        dataFrame columnNames withIndexDo: [ :columnName :index |

5✔

2851

                | paddedColumnName |

5✔

2852

                paddedColumnName := columnName asString padRightTo: (columnWidths at: index).

5✔

2853

                stringTable nextPutAll: paddedColumnName , '  ' ].

5✔

2854

        stringTable cr.

5✔

2855

5✔

2856

5✔

2857

5✔

2858

        dataFrame asArrayOfRows do: [ :row |

5✔

2859

                row withIndexDo: [ :value :index |

5✔

2860

                        | paddedValue |

5✔

2861

                        paddedValue := value printString padRightTo:

5✔

2862

                                               (columnWidths at: index).

5✔

2863

                        stringTable nextPutAll: paddedValue , '  ' ].

5✔

2864

                stringTable cr ].

5✔

2865

5✔

2866

        ^ stringTable contents

5✔

2867

5✔

2870

DataFrame >> transposed [

5✔

2871

        "Returns a transposed DataFrame. Columns become rows and rows become columns."

5✔

2872

5✔

2873

        "(#(#(1 2) #(3 4)) asDataFrame transposed) >>> (#(#(1 3) #(2 4)) asDataFrame)"

5✔

2874

5✔

2875

        "(#(#(1 2 3)) asDataFrame transposed) >>> (#(#(1) #(2) #(3)) asDataFrame)"

5✔

2876

5✔

2877

        "(#(#(r1c1 r1c2) #(r2c1 r2c2)) asDataFrame transposed) >>> (#(#(r1c1 r2c1) #(r1c2 r2c2)) asDataFrame)"

5✔

2878

5✔

2879

        | transposedDf |

5✔

2880

        transposedDf := DataFrame withRows: self asArrayOfColumns.

5✔

2881

        transposedDf rowNames: self columnNames.

5✔

2882

        transposedDf columnNames: self rowNames.

5✔

2883

        ^ transposedDf

5✔

2884

5✔

2887

DataFrame >> variance [

5✔

2888

        "variance measures how far each number in the set is from the average value of the set. It is the square of standard deviation."

5✔

2889

5✔

2890

        "(#(#(10 3) #(20 1) #(30 2)) asDataFrame variance) >>> (Dictionary newFrom: {(1 -> 100).(2 -> 1)})"

5✔

2891

5✔

2892

        ^ self applyToAllColumns: #variance

5✔

2893

5✔

2896

DataFrame >> withIndexCollect: elementAndIndexBlock [

5✔

2897

        "Overrides withIndexCollect: to create DataFrame with the same number of columns as values in the first row"

5✔

2898

        | firstRow newDataFrame |

5✔

2899

5✔

2900

        firstRow := (self rowAt: 1) copy.

5✔

2901

        newDataFrame := self class new: 0@(elementAndIndexBlock value: firstRow value: 1) size.

5✔

2902

        newDataFrame columnNames: firstRow keys.

5✔

2903

5✔

2904

        self withIndexDo: [ :each :index | newDataFrame add: (elementAndIndexBlock value: each copy value: index)].

5✔

2905

        ^ newDataFrame

5✔

2906

5✔

2909

DataFrame >> withIndexDo: elementAndIndexBlock [

5✔

2910

5✔

2911

        1 to: self size do: [ :i |

5✔

2912

                | row |

5✔

2913

                row := (self rowAt: i).

5✔

2914

                elementAndIndexBlock value: row value: i.

5✔

2915

5✔

2916

                "A hack to allow modification of rows inside do block"

5✔

2917

                self rowAt: i put: row asArray ]

5✔

2918

5✔

2921

DataFrame >> withIndexReject: elementAndIndexBlock [

5✔

2922

        "Evaluate aBlock with each of the receiver's elements and index as the arguments.

5✔

2923

        Collect into a new collection like the receiver, only those elements for

5✔

2924

        which aBlock evaluates to false. Answer the new collection."

5✔

2925

        ^ self withIndexSelect: [ :row :index | (elementAndIndexBlock value: row value: index) not ]

5✔

2926

5✔

2929

DataFrame >> withIndexSelect: aBlock [

5✔

2930

        "Evaluate aBlock with each of the receiver's elements and index as the arguments.

5✔

2931

        Collect into a new collection like the receiver, only those elements for

5✔

2932

        which aBlock evaluates to true. Answer the new collection."

5✔

2933

5✔

2934

        | selectedIndexes |

5✔

2935

5✔

2936

        selectedIndexes := (1 to: self numberOfRows) select: [ :index |

5✔

2937

                aBlock value: (self at: index) value: index ].

5✔

2938

5✔

2939

        ^ self rowsAt: selectedIndexes

5✔

2940

5✔

PolyMathOrg / DataFrame / 13409391746

Source File
Press 'n' to go to next uncovered line, 'b' for previous

PolyMathOrg / DataFrame / 13409391746

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous