Old fashioned CSV parse error

Hi

I have a very conventional CSV file, fields enclosed in double quotes and then separated by commas. Some fields have commas in them, so the straight forward comma delimiter isn’t working. I’m using the community edition with the universal data reader. What’s the best way to specify the delimiters when defining meta data?

Here’s a sample record from the csv file.

“2”,“70698”,“1”,“21”,“Downfield Road”,“Hertford Heath”,“Hertfordshire”,“SG13 7RX”,“SG13 7RX”,“265000”,“Reduced to”,“House - terraced”,“0”,“2”,“2”,“Two bedroom mid-terrace property with two reception rooms, en-suite to master bedroom and separate downstairs shower room. Close to village green in Hertford Heath.”,“01/Nov/2010 16:14”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“”,“14”,“2”,“0”,“A”,“1”,“1”,“1”,“0”,“20690700”,“53”,“66”,“53”,“67”,“3”,“”,“”,“”,“0”,“0”,“0”,“0”,“”,“Downfield Road, Hertford Heath, Hertfordshire”,“2”,“0”,“0”,“http://www.vebra.com/details/property/22815/20690700”,“”

Thanks

Karen

Hello Karen,
have you set quotedStrings to true in Data Reader? It would be possible to parse all CSV compliant data.

Thanks for that, I have specified that in the Data Reader, but when editing the metadata table, the additional commas in the text of the fields (description for example) creates an additional field, so at the end, there are two many fields when I try to extract the records. I’m sure there’s a simple solution to this staring me in the face.

Thanks.

Hi Karen,

the quoted strings parsing works well in UniversalDataReader. I tried to reproduce your problem and did not succeed - even editing metadata works fine. See my graph:

<?xml version="1.0" encoding="UTF-8"?>
<Graph author="lucie" created="Thu Nov 11 14:58:22 CET 2010" guiVersion="3.0.0" id="1289484443448" licenseCode="community" licenseType="Community" modified="Thu Nov 11 16:27:51 CET 2010" modifiedBy="lucie" name="quoted_string" revision="1.21">
<Global>
<Metadata id="Metadata0" previewAttachmentCharset="ISO-8859-1">
<Record fieldDelimiter="," name="recordName1" previewAttachmentCharset="ISO-8859-1" recordDelimiter="\r\n" type="delimited">
<Field name="field1" type="integer"/>
<Field name="field2" type="integer"/>
<Field name="field3" type="integer">
<attr name="description"><![CDATA[sample description]]></attr>
</Field>
<Field name="field4" type="integer"/>
<Field name="field5" type="string"/>
<Field name="field6" type="string"/>
<Field name="field7" type="string"/>
<Field name="field8" type="string"/>
<Field name="field9" type="string"/>
<Field name="field10" type="string"/>
<Field name="field11" type="string"/>
<Field name="field12" type="string"/>
<Field name="field13" type="integer"/>
<Field name="field14" type="integer"/>
<Field name="field15" type="integer"/>
<Field name="field16" type="string"/>
<Field format="dd/MMM/yyyy HH:mm" locale="en" name="field17" type="date"/>
<Field name="field18" type="string"/>
<Field name="field19" type="string"/>
<Field name="field20" type="string"/>
<Field name="field21" type="string"/>
<Field name="field22" type="string"/>
<Field name="field23" type="string"/>
<Field name="field24" type="string"/>
<Field name="field25" type="string"/>
<Field name="field26" type="string"/>
<Field name="field27" type="string"/>
<Field name="field28" type="string"/>
<Field name="field29" type="string"/>
<Field name="field30" type="string"/>
<Field name="field31" type="string"/>
<Field name="field32" type="string"/>
<Field name="field33" type="string"/>
<Field name="field34" type="string"/>
<Field name="field35" type="string"/>
<Field name="field36" type="string"/>
<Field name="field37" type="string"/>
<Field name="field38" type="string"/>
<Field name="field39" type="string"/>
<Field name="field40" type="string"/>
<Field name="field41" type="string"/>
<Field name="field42" type="string"/>
<Field name="field43" type="string"/>
<Field name="field44" type="string"/>
<Field name="field45" type="string"/>
<Field name="field46" type="string">
<attr name="description"><![CDATA[describtion community]]></attr>
</Field>
<Field name="field47" type="string"/>
<Field name="field48" type="string"/>
<Field name="field49" type="integer"/>
<Field name="field50" type="integer"/>
<Field name="field51" type="integer"/>
<Field name="field52" type="string"/>
<Field name="field53" type="integer"/>
<Field name="field54" type="integer"/>
<Field name="field55" type="integer">
<attr name="description"><![CDATA[sample description]]></attr>
</Field>
<Field name="field56" type="integer"/>
<Field name="field57" type="integer"/>
<Field name="field58" type="integer"/>
<Field name="field59" type="integer"/>
<Field name="field60" type="integer"/>
<Field name="field61" type="integer"/>
<Field name="field62" type="integer"/>
<Field name="field63" type="integer"/>
<Field name="field64" type="integer"/>
<Field name="field65" type="integer"/>
<Field name="field66" type="integer"/>
<Field name="field67" type="integer"/>
<Field name="field68" type="integer"/>
<Field name="field69" type="integer"/>
<Field name="field70" type="integer"/>
<Field name="field71" type="string"/>
<Field name="field72" type="integer"/>
<Field name="field73" type="integer"/>
<Field name="field74" type="string"/>
<Field name="field75" type="string"/>
<Field name="field76" type="string"/>
</Record>
</Metadata>
<Property fileURL="workspace.prm" id="GraphParameter0"/>
<Dictionary/>
</Global>
<Phase number="0">
<Node enabled="enabled" fileURL="${DATAIN_DIR}/quoted_string.in" guiHeight="0" guiName="UniversalDataReader" guiWidth="0" guiX="74" guiY="106" id="DATA_READER0" quotedStrings="true" type="DATA_READER" verbose="true"/>
<Node debugPrint="true" enabled="enabled" guiHeight="0" guiName="Trash" guiWidth="0" guiX="391" guiY="101" id="TRASH0" type="TRASH"/>
<Edge fromNode="DATA_READER0:0" guiBendpoints="" guiRouter="Manhattan" id="Edge0" inPort="Port 0 (in)" metadata="Metadata0" outPort="Port 0 (output)" toNode="TRASH0:0"/>
</Phase>
</Graph>