Skip to content

Commit fbfa9b5

Browse files
authored
Merge pull request #24 from OpenIsraeliSupermarkets/v0.1.8
V0.1.8
2 parents 614efec + fabe89a commit fbfa9b5

File tree

5 files changed

+31
-9
lines changed

5 files changed

+31
-9
lines changed

il_supermarket_parsers/documents/xml_dataframe_parser.py

+2
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ class XmlDataFrameConverter(BaseXMLParser):
1212

1313
def reduce_size(self, data):
1414
"""reduce the size"""
15+
data = data.fillna("")
16+
# remove duplicate columns
1517
for col in data.columns:
1618
data[col] = data[col].mask(data[col] == data[col].shift())
1719
return data

il_supermarket_parsers/multiprocess_pharser.py

+19-7
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import itertools
22
import json
3+
import datetime
34
import os
5+
import pytz
46
from .raw_parsing_pipeline import RawParsingPipeline
57
from .utils.multi_processing import MultiProcessor, ProcessJob
68
from .parser_factory import ParserFactory
@@ -20,9 +22,10 @@ def job(self, **kwargs):
2022
parser_name = kwargs.pop("store_enum")
2123
output_folder = kwargs.pop("output_folder")
2224
limit = kwargs.pop("limit")
25+
when_date = kwargs.pop("when_date")
2326

2427
return RawParsingPipeline(
25-
drop_folder, parser_name, file_type, output_folder
28+
drop_folder, parser_name, file_type, output_folder, when_date
2629
).process(limit=limit)
2730

2831

@@ -36,12 +39,14 @@ def __init__(
3639
enabled_file_types=None,
3740
multiprocessing=6,
3841
output_folder="output",
42+
when_date=datetime.datetime.now(pytz.timezone("Asia/Jerusalem")),
3943
):
4044
super().__init__(multiprocessing=multiprocessing)
4145
self.data_folder = data_folder
4246
self.enabled_parsers = enabled_parsers
4347
self.enabled_file_types = enabled_file_types
4448
self.output_folder = output_folder
49+
self.when_date = when_date
4550

4651
def task_to_execute(self):
4752
"""the task to execute"""
@@ -67,6 +72,7 @@ def get_arguments_list(self, limit=None):
6772
"file_type",
6873
"data_folder",
6974
"output_folder",
75+
"when_date",
7076
]
7177
combinations = list(
7278
itertools.product(
@@ -75,6 +81,7 @@ def get_arguments_list(self, limit=None):
7581
all_file_types,
7682
[self.data_folder],
7783
[self.output_folder],
84+
[self.when_date.strftime("%Y-%m-%d %H:%M:%S %z")]
7885
)
7986
)
8087
task_can_executed_independently = [
@@ -84,10 +91,15 @@ def get_arguments_list(self, limit=None):
8491

8592
def post(self, results):
8693
"""post process the results"""
87-
with open(
88-
os.path.join(self.output_folder, "parser-status.json"),
89-
"w",
90-
encoding="utf-8",
91-
) as file:
92-
json.dump(results, file)
94+
status_file = os.path.join(self.output_folder, "parser-status.json")
95+
if os.path.exists(status_file):
96+
with open(status_file, "r", encoding="utf-8") as file:
97+
existing_results = json.load(file)
98+
else:
99+
existing_results = []
100+
101+
existing_results.extend(results)
102+
103+
with open(status_file, "w", encoding="utf-8") as file:
104+
json.dump(existing_results, file)
93105
return super().post(results)

il_supermarket_parsers/raw_parsing_pipeline.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -14,11 +14,12 @@ class RawParsingPipeline:
1414
processing files to dataframe
1515
"""
1616

17-
def __init__(self, folder, store_name, file_type, output_folder) -> None:
17+
def __init__(self, folder, store_name, file_type, output_folder, when_date) -> None:
1818
self.store_name = store_name
1919
self.file_type = file_type
2020
self.folder = folder
2121
self.output_folder = output_folder
22+
self.when_date = when_date
2223

2324
def append_columns_to_csv(self, existing_file, new_columns):
2425
"""Append new columns to an existing CSV file"""
@@ -125,6 +126,7 @@ def process(self, limit=None):
125126
"status": True,
126127
"store_name": self.store_name,
127128
"files_types": self.file_type,
129+
"when_date": self.when_date,
128130
"processed_files": len(files_to_process) > 0,
129131
"execution_errors": execution_errors > 0,
130132
"file_was_created": os.path.exists(create_csv),

il_supermarket_parsers/task.py

+6
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
1+
import datetime
2+
import pytz
13
from .multiprocess_pharser import ParallelParser
24
from .utils.logger import Logger
35

46

7+
58
class ConvertingTask:
69
"""main convert task"""
710

@@ -12,6 +15,7 @@ def __init__(
1215
files_types=None,
1316
multiprocessing=6,
1417
limit=None,
18+
when_date=datetime.datetime.now(pytz.timezone("Asia/Jerusalem")),
1519
output_folder="outputs",
1620
):
1721
Logger.info(
@@ -21,13 +25,15 @@ def __init__(
2125
f"files_types = {files_types}"
2226
f"output_folder={output_folder}"
2327
f"limit={limit}"
28+
f"when_date={when_date}"
2429
)
2530
self.runner = ParallelParser(
2631
data_folder,
2732
enabled_parsers=enabled_parsers,
2833
enabled_file_types=files_types,
2934
multiprocessing=multiprocessing,
3035
output_folder=output_folder,
36+
when_date=when_date,
3137
)
3238
self.limit = limit
3339

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@
3131
tests_require=dev_required,
3232
extras_require={"test": ["pytest", "pytest-xdist"]},
3333
# *strongly* suggested for sharing
34-
version="0.1.7",
34+
version="0.1.8",
3535
# The license can be anything you like
3636
license="MIT",
3737
description="python package that process the data dumped by the israeli supermarket",

0 commit comments

Comments
 (0)