-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathCsvPlugin.java
114 lines (104 loc) · 4.46 KB
/
CsvPlugin.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*******************************************************************************
* Copyright (c) 2018 IBM Corp.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package plugins.custom;
import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import com.ibm.es.ama.zing.common.AmaIngestionException;
import com.ibm.es.ama.zing.common.model.axml.AXMLBase.Publisher;
import com.ibm.es.ama.zing.common.model.axml.Content;
import com.ibm.es.ama.zing.common.model.axml.CrawlData;
import com.ibm.es.ama.zing.common.model.axml.CrawlData.Builder;
import com.ibm.es.ama.zing.common.model.converter.ConverterBase;
import com.ibm.es.ama.zing.common.model.converter.CustomConverterPlugin;
import com.ibm.es.ama.zing.common.model.axml.Document;
public class CsvPlugin implements CustomConverterPlugin {
@Override
public boolean canProcess(CrawlData data) {
if (getGeneralSettings() != null) {
return CustomConverterPlugin.super.canProcess(data);
} else {
return "text/csv".equals(data.getContentType());
}
}
@Override
public boolean canReadMetadataFrom(CrawlData data) {
if (getGeneralSettings() != null) {
return CustomConverterPlugin.super.canReadMetadataFrom(data);
} else {
return "application/metadata".equals(data.getContentType());
}
}
@Override
public boolean removesMetadata() {
return true;
}
@Override
public void postProcess(CrawlData data, Builder builder) {
if (getGeneralSettings() != null) {
CustomConverterPlugin.super.postProcess(data, builder);
} else {
builder.setContentType("application/axml");
}
}
@Override
public boolean forcesDeletion() {
return true;
}
@Override
public ConverterStatus convert(Document document, Publisher<? super Document> documentPublisher,
Iterable<Document> metaDocumentList) throws AmaIngestionException {
ConverterStatus status = ConverterBase.ConverterStatus.NO_OUTPUT;
ArrayList<Content> metaContents = new ArrayList<>();
for (Content content : document) {
if (!content.getName().equals("body")) {
metaContents.add(content);
}
}
for (Document metaDocument : metaDocumentList) {
for (Content content : metaDocument) {
metaContents.add(content);
}
}
for (Content content : document) {
if (content.getName().equals("body")) {
try (BufferedReader reader = new BufferedReader(content.getValueAsReader(true))) {
long row = 0;
for (String line = reader.readLine(); line != null; line = reader.readLine()) {
Document.Builder documentBuilder = document.builder();
int columnNumber = 0;
for (String column : line.split(",")) {
Content.Builder contentBuilder = content.builder();
contentBuilder.setName("@" + columnNumber++);
contentBuilder.setValueFromString(column);
documentBuilder.append(contentBuilder.get());
}
for (Content metaContent : metaContents) {
documentBuilder.append(metaContent);
}
documentBuilder.setGroupID(document.getGroupID() + "?row=" + row);
documentBuilder.setUrl(document.getUrl() + "?row=" + row++);
documentPublisher.append(documentBuilder.get());
status = ConverterBase.ConverterStatus.MODIFIED;
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
return status;
}
}