@@ -14,6 +14,9 @@ module instead.
14
14
#include "structmember.h" // PyMemberDef
15
15
#include <stdbool.h>
16
16
17
+ #define NOT_SET ((Py_UCS4)-1)
18
+ #define EOL ((Py_UCS4)-2)
19
+
17
20
18
21
typedef struct {
19
22
PyObject * error_obj ; /* CSV exception */
@@ -153,9 +156,9 @@ get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
153
156
}
154
157
155
158
static PyObject *
156
- get_nullchar_as_None (Py_UCS4 c )
159
+ get_char_or_None (Py_UCS4 c )
157
160
{
158
- if (c == '\0' ) {
161
+ if (c == NOT_SET ) {
159
162
Py_RETURN_NONE ;
160
163
}
161
164
else
@@ -172,19 +175,19 @@ Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
172
175
static PyObject *
173
176
Dialect_get_delimiter (DialectObj * self , void * Py_UNUSED (ignored ))
174
177
{
175
- return get_nullchar_as_None (self -> delimiter );
178
+ return get_char_or_None (self -> delimiter );
176
179
}
177
180
178
181
static PyObject *
179
182
Dialect_get_escapechar (DialectObj * self , void * Py_UNUSED (ignored ))
180
183
{
181
- return get_nullchar_as_None (self -> escapechar );
184
+ return get_char_or_None (self -> escapechar );
182
185
}
183
186
184
187
static PyObject *
185
188
Dialect_get_quotechar (DialectObj * self , void * Py_UNUSED (ignored ))
186
189
{
187
- return get_nullchar_as_None (self -> quotechar );
190
+ return get_char_or_None (self -> quotechar );
188
191
}
189
192
190
193
static PyObject *
@@ -235,7 +238,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt
235
238
* target = dflt ;
236
239
}
237
240
else {
238
- * target = '\0' ;
241
+ * target = NOT_SET ;
239
242
if (src != Py_None ) {
240
243
if (!PyUnicode_Check (src )) {
241
244
PyErr_Format (PyExc_TypeError ,
@@ -254,7 +257,7 @@ _set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt
254
257
return -1 ;
255
258
}
256
259
/* PyUnicode_READY() is called in PyUnicode_GetLength() */
257
- else {
260
+ else if ( len > 0 ) {
258
261
* target = PyUnicode_READ_CHAR (src , 0 );
259
262
}
260
263
}
@@ -269,7 +272,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
269
272
* target = dflt ;
270
273
}
271
274
else {
272
- * target = '\0' ;
275
+ * target = NOT_SET ;
273
276
if (!PyUnicode_Check (src )) {
274
277
PyErr_Format (PyExc_TypeError ,
275
278
"\"%s\" must be string, not %.200s" , name ,
@@ -287,7 +290,7 @@ _set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
287
290
return -1 ;
288
291
}
289
292
/* PyUnicode_READY() is called in PyUnicode_GetLength() */
290
- else {
293
+ else if ( len > 0 ) {
291
294
* target = PyUnicode_READ_CHAR (src , 0 );
292
295
}
293
296
}
@@ -481,7 +484,7 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
481
484
goto err
482
485
DIASET (_set_char , "delimiter" , & self -> delimiter , delimiter , ',' );
483
486
DIASET (_set_bool , "doublequote" , & self -> doublequote , doublequote , true);
484
- DIASET (_set_char_or_none , "escapechar" , & self -> escapechar , escapechar , 0 );
487
+ DIASET (_set_char_or_none , "escapechar" , & self -> escapechar , escapechar , NOT_SET );
485
488
DIASET (_set_str , "lineterminator" , & self -> lineterminator , lineterminator , "\r\n" );
486
489
DIASET (_set_char_or_none , "quotechar" , & self -> quotechar , quotechar , '"' );
487
490
DIASET (_set_int , "quoting" , & self -> quoting , quoting , QUOTE_MINIMAL );
@@ -491,19 +494,19 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
491
494
/* validate options */
492
495
if (dialect_check_quoting (self -> quoting ))
493
496
goto err ;
494
- if (self -> delimiter == 0 ) {
497
+ if (self -> delimiter == NOT_SET ) {
495
498
PyErr_SetString (PyExc_TypeError ,
496
499
"\"delimiter\" must be a 1-character string" );
497
500
goto err ;
498
501
}
499
502
if (quotechar == Py_None && quoting == NULL )
500
503
self -> quoting = QUOTE_NONE ;
501
- if (self -> quoting != QUOTE_NONE && self -> quotechar == 0 ) {
504
+ if (self -> quoting != QUOTE_NONE && self -> quotechar == NOT_SET ) {
502
505
PyErr_SetString (PyExc_TypeError ,
503
506
"quotechar must be set if quoting enabled" );
504
507
goto err ;
505
508
}
506
- if (self -> lineterminator == 0 ) {
509
+ if (self -> lineterminator == NULL ) {
507
510
PyErr_SetString (PyExc_TypeError , "lineterminator must be set" );
508
511
goto err ;
509
512
}
@@ -670,7 +673,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
670
673
switch (self -> state ) {
671
674
case START_RECORD :
672
675
/* start of record */
673
- if (c == '\0' )
676
+ if (c == EOL )
674
677
/* empty line - return [] */
675
678
break ;
676
679
else if (c == '\n' || c == '\r' ) {
@@ -682,11 +685,11 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
682
685
/* fallthru */
683
686
case START_FIELD :
684
687
/* expecting field */
685
- if (c == '\n' || c == '\r' || c == '\0' ) {
688
+ if (c == '\n' || c == '\r' || c == EOL ) {
686
689
/* save empty field - return [fields] */
687
690
if (parse_save_field (self ) < 0 )
688
691
return -1 ;
689
- self -> state = (c == '\0' ? START_RECORD : EAT_CRNL );
692
+ self -> state = (c == EOL ? START_RECORD : EAT_CRNL );
690
693
}
691
694
else if (c == dialect -> quotechar &&
692
695
dialect -> quoting != QUOTE_NONE ) {
@@ -722,25 +725,25 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
722
725
self -> state = AFTER_ESCAPED_CRNL ;
723
726
break ;
724
727
}
725
- if (c == '\0' )
728
+ if (c == EOL )
726
729
c = '\n' ;
727
730
if (parse_add_char (self , module_state , c ) < 0 )
728
731
return -1 ;
729
732
self -> state = IN_FIELD ;
730
733
break ;
731
734
732
735
case AFTER_ESCAPED_CRNL :
733
- if (c == '\0' )
736
+ if (c == EOL )
734
737
break ;
735
738
/*fallthru*/
736
739
737
740
case IN_FIELD :
738
741
/* in unquoted field */
739
- if (c == '\n' || c == '\r' || c == '\0' ) {
742
+ if (c == '\n' || c == '\r' || c == EOL ) {
740
743
/* end of line - return [fields] */
741
744
if (parse_save_field (self ) < 0 )
742
745
return -1 ;
743
- self -> state = (c == '\0' ? START_RECORD : EAT_CRNL );
746
+ self -> state = (c == EOL ? START_RECORD : EAT_CRNL );
744
747
}
745
748
else if (c == dialect -> escapechar ) {
746
749
/* possible escaped character */
@@ -761,7 +764,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
761
764
762
765
case IN_QUOTED_FIELD :
763
766
/* in quoted field */
764
- if (c == '\0' )
767
+ if (c == EOL )
765
768
;
766
769
else if (c == dialect -> escapechar ) {
767
770
/* Possible escape character */
@@ -786,7 +789,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
786
789
break ;
787
790
788
791
case ESCAPE_IN_QUOTED_FIELD :
789
- if (c == '\0' )
792
+ if (c == EOL )
790
793
c = '\n' ;
791
794
if (parse_add_char (self , module_state , c ) < 0 )
792
795
return -1 ;
@@ -808,11 +811,11 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
808
811
return -1 ;
809
812
self -> state = START_FIELD ;
810
813
}
811
- else if (c == '\n' || c == '\r' || c == '\0' ) {
814
+ else if (c == '\n' || c == '\r' || c == EOL ) {
812
815
/* end of line - return [fields] */
813
816
if (parse_save_field (self ) < 0 )
814
817
return -1 ;
815
- self -> state = (c == '\0' ? START_RECORD : EAT_CRNL );
818
+ self -> state = (c == EOL ? START_RECORD : EAT_CRNL );
816
819
}
817
820
else if (!dialect -> strict ) {
818
821
if (parse_add_char (self , module_state , c ) < 0 )
@@ -831,7 +834,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
831
834
case EAT_CRNL :
832
835
if (c == '\n' || c == '\r' )
833
836
;
834
- else if (c == '\0' )
837
+ else if (c == EOL )
835
838
self -> state = START_RECORD ;
836
839
else {
837
840
PyErr_Format (module_state -> error_obj ,
@@ -909,20 +912,14 @@ Reader_iternext(ReaderObj *self)
909
912
linelen = PyUnicode_GET_LENGTH (lineobj );
910
913
while (linelen -- ) {
911
914
c = PyUnicode_READ (kind , data , pos );
912
- if (c == '\0' ) {
913
- Py_DECREF (lineobj );
914
- PyErr_Format (module_state -> error_obj ,
915
- "line contains NUL" );
916
- goto err ;
917
- }
918
915
if (parse_process_char (self , module_state , c ) < 0 ) {
919
916
Py_DECREF (lineobj );
920
917
goto err ;
921
918
}
922
919
pos ++ ;
923
920
}
924
921
Py_DECREF (lineobj );
925
- if (parse_process_char (self , module_state , 0 ) < 0 )
922
+ if (parse_process_char (self , module_state , EOL ) < 0 )
926
923
goto err ;
927
924
} while (self -> state != START_RECORD );
928
925
@@ -1127,7 +1124,7 @@ join_append_data(WriterObj *self, unsigned int field_kind, const void *field_dat
1127
1124
* quoted = 1 ;
1128
1125
}
1129
1126
if (want_escape ) {
1130
- if (! dialect -> escapechar ) {
1127
+ if (dialect -> escapechar == NOT_SET ) {
1131
1128
PyErr_Format (self -> error_obj ,
1132
1129
"need to escape, but no escapechar set" );
1133
1130
return -1 ;
0 commit comments