• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

PolyMathOrg / DataFrame / 13409391746

19 Feb 2025 09:30AM UTC coverage: 94.756%. Remained the same
13409391746

push

github

web-flow
Enable Pharo 12 and 13 for the CI

13571 of 14322 relevant lines covered (94.76%)

4.74 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.23
/src/DataFrame-IO/DataFrameCsvReader.class.st
1
"
2
I read csv files and convert them into DataFrames.
3

4
I read text files containing multiple columns separated by a `separator` character. The first row will become the column names of the DataFrame. As I read the file I detect the type of data in the columns. I currently know how to detect and convert floating point numbers, integers, dates, and times. When I can't detect a consistent type for all rows of a column I leave the column type as a string.
5

6
My behaviour can be modified by changing the column separator with `sepatator:`, turning off column type conversion using `shouldInferTypes: false`, or setting the first column as the row names using `includeRowNames: true`.
7

8
Use the `readFrom:` message to read a stream and return a `DataFrame`:
9

10
    reader := DataFrameCsvReader new.
11
    reader readFrom: (FileLocator home / 'data.csv') 
12

13
Modify my behaviour before calling `readFrom:`. The following example reads from a tab-separated stream and turns off any type conversions so all columns will be strings:
14

15
    df := DataFrameCsvReader new separator: Character tab; 
16
            shouldInferTypes: false;
17
            readFrom: aStream
18

19
It is also possible to specify the types of the columns in the file by passing a dictionary to `columnTypes:` message. Column type conversion only happens if `shouldInferTypes` is true. Refer to `DataFrameTypeDetector` for more information about the format of the dictionary to be given to `columnTypes:`.
20
 
21
Internal Representation and Key Implementation Points.
22

23
    Instance Variables
24
        includeRowNames:                Boolean
25
        separator:                Character
26
        shouldInferTypes:                Boolean
27

28

29
    Implementation Points
30
"
31
Class {
32
        #name : #DataFrameCsvReader,
33
        #superclass : #DataFrameReader,
34
        #instVars : [
35
                'separator',
36
                'includeRowNames',
37
                'shouldInferTypes',
38
                'rowNames',
39
                'rows',
40
                'columnNames'
41
        ],
42
        #category : #'DataFrame-IO-Core'
43
}
44

45
{ #category : #reading }
46
DataFrameCsvReader >> columnNames: aCollectionOfString [
5✔
47
        "Set the receiver' s column names"
5✔
48
        
5✔
49
        columnNames := aCollectionOfString
5✔
50
]
5✔
51

52
{ #category : #reading }
53
DataFrameCsvReader >> createDataFrame [
5✔
54
        | df |
5✔
55
        df := DataFrame
5✔
56
                withRows: rows
5✔
57
                columnNames: columnNames.
5✔
58

5✔
59
        self includeRowNames ifTrue: [
5✔
60
                df privateRowNames: rowNames ].
5✔
61

5✔
62
        ^ df
5✔
63
]
5✔
64

65
{ #category : #accessing }
66
DataFrameCsvReader >> defaultIncludeRowNames [
5✔
67
        ^ false
5✔
68
]
5✔
69

70
{ #category : #accessing }
71
DataFrameCsvReader >> defaultSeparator [
5✔
72
        ^ $,
5✔
73
]
5✔
74

75
{ #category : #accessing }
76
DataFrameCsvReader >> defaultShouldInferTypes [
5✔
77
        ^ true
5✔
78
]
5✔
79

80
{ #category : #accessing }
81
DataFrameCsvReader >> includeRowNames [
5✔
82
        ^ includeRowNames
5✔
83
]
5✔
84

85
{ #category : #accessing }
86
DataFrameCsvReader >> includeRowNames: anObject [
5✔
87
        includeRowNames := anObject
5✔
88
]
5✔
89

90
{ #category : #initialization }
91
DataFrameCsvReader >> initialize [
5✔
92
        super initialize.
5✔
93
        separator := self defaultSeparator.
5✔
94
        includeRowNames := self defaultIncludeRowNames.
5✔
95
        shouldInferTypes := self defaultShouldInferTypes.
5✔
96
        columnTypes := Dictionary new
5✔
97
]
5✔
98

99
{ #category : #reading }
100
DataFrameCsvReader >> readColumnNamesWith: aReader [
5✔
101
        "Set the receiver's column names if they were not manually set"
5✔
102

5✔
103
        columnNames ifNil: [ columnNames := aReader readHeader ].
5✔
104

5✔
105
        self includeRowNames ifTrue: [
5✔
106
                columnNames := columnNames copyWithoutFirst ]
5✔
107
]
5✔
108

109
{ #category : #reading }
110
DataFrameCsvReader >> readFrom: aFileReference [
5✔
111
        "Read data frame from a CSV file"
5✔
112
        | df |
5✔
113
        df := self readFromInternal: aFileReference .
5✔
114
        shouldInferTypes ifTrue: [
5✔
115
                DataFrameTypeDetector new columnTypes: columnTypes; detectTypesAndConvert: df ].
5✔
116
        ^ df
5✔
117
]
5✔
118

119
{ #category : #reading }
120
DataFrameCsvReader >> readFromInternal: aFileReference [
5✔
121
        "Read data frame from a CSV file"
5✔
122

5✔
123
        | stream reader df |
5✔
124
        stream := aFileReference readStream.
5✔
125
        reader := NeoCSVReader on: stream.
5✔
126
        reader separator: self separator.
5✔
127

5✔
128
        self readColumnNamesWith: reader.
5✔
129
        self readRowsWith: reader.
5✔
130
        reader close.
5✔
131
        df := self createDataFrame.
5✔
132
        ^ df
5✔
133
]
5✔
134

135
{ #category : #reading }
136
DataFrameCsvReader >> readFromString: aString [
5✔
137
        "Read data frame from aString"
5✔
138

5✔
139
        ^ self
5✔
140
                readFromString: aString
5✔
141
                withSeparator: self separator
5✔
142
]
5✔
143

144
{ #category : #reading }
145
DataFrameCsvReader >> readFromString: aString withSeparator: aSeparator [
5✔
146
        "Read data frame from aString"
5✔
147

5✔
148
        | reader df |
5✔
149

5✔
150
        reader := NeoCSVReader on: aString readStream.
5✔
151
        reader separator: aSeparator.
5✔
152

5✔
153
        self readColumnNamesWith: reader.
5✔
154
        self readRowsWith: reader.
5✔
155
        reader close.
5✔
156
        df := self createDataFrame.
5✔
157
        ^ df
5✔
158
]
5✔
159

160
{ #category : #reading }
161
DataFrameCsvReader >> readFromString: aCSVString withSeparator: aSeparator skip: nRows [
5✔
162
        "Read data frame from aCSVString skipping nRows from its header"
5✔
163

5✔
164
        | reader df |
5✔
165

5✔
166
        reader := NeoCSVReader on: aCSVString readStream.
5✔
167
        reader separator: aSeparator.
5✔
168
        nRows timesRepeat: [ reader skipHeader ].
5✔
169

5✔
170
        self readColumnNamesWith: reader.
5✔
171
        self readRowsWith: reader.
5✔
172
        reader close.
5✔
173
        df := self createDataFrame.
5✔
174
        ^ df
5✔
175
]
5✔
176

177
{ #category : #reading }
178
DataFrameCsvReader >> readFromString: aString withSeparator: aSeparator withHeader: hasHeader [
5✔
179
        "Read data frame from aString"
5✔
180

5✔
181
        | reader df |
5✔
182

5✔
183
        reader := NeoCSVReader on: aString readStream.
5✔
184
        reader separator: aSeparator.
5✔
185

5✔
186
        hasHeader
5✔
187
                ifTrue: [ self readColumnNamesWith: reader ]
5✔
188
                ifFalse: [ self setDefaultColumnsWith: reader ].
5✔
189
        self readRowsWith: reader.
5✔
190
        reader close.
5✔
191
        df := self createDataFrame.
5✔
192
        ^ df
5✔
193
]
5✔
194

195
{ #category : #reading }
196
DataFrameCsvReader >> readOnlyRowsWith: aReader [
5✔
197
        rows := OrderedCollection new.
5✔
198

5✔
199
        [ aReader atEnd ] whileFalse: [
5✔
200
                rows add: aReader next ]
5✔
201
]
5✔
202

203
{ #category : #reading }
204
DataFrameCsvReader >> readRowsAndRowNamesWith: aReader [
5✔
205
        | line |
5✔
206
        rowNames := OrderedCollection new.
5✔
207
        rows := OrderedCollection new.
5✔
208

5✔
209
        [ aReader atEnd ] whileFalse: [
5✔
210
                line := aReader next.
5✔
211
                rowNames add: line first.
5✔
212
                rows add: line copyWithoutFirst ]
5✔
213
]
5✔
214

215
{ #category : #reading }
216
DataFrameCsvReader >> readRowsWith: aReader [
5✔
217
        ^ self includeRowNames
5✔
218
                ifTrue: [ self readRowsAndRowNamesWith: aReader ]
5✔
219
                ifFalse: [ self readOnlyRowsWith: aReader ]
5✔
220
]
5✔
221

222
{ #category : #accessing }
223
DataFrameCsvReader >> separator [
5✔
224
        ^ separator
5✔
225
]
5✔
226

227
{ #category : #accessing }
228
DataFrameCsvReader >> separator: anObject [
5✔
229
        separator := anObject
5✔
230
]
5✔
231

232
{ #category : #reading }
233
DataFrameCsvReader >> setDefaultColumnsWith: reader [
5✔
234
        "Set numbered columns"
5✔
235

5✔
236
        columnNames := (1 to: reader next size) asArray.
5✔
237
        reader resetStream
5✔
238
]
5✔
239

240
{ #category : #accessing }
241
DataFrameCsvReader >> shouldInferTypes [
×
242
        ^ shouldInferTypes
×
243
]
×
244

245
{ #category : #accessing }
246
DataFrameCsvReader >> shouldInferTypes: anObject [
×
247
        shouldInferTypes := anObject
×
248
]
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc