|
15 | 15 | # pylint: disable=C1801 |
16 | 16 |
|
17 | 17 |
|
| 18 | +def df_apply(data: pd.DataFrame, func, axis: int = 1) -> pd.DataFrame: |
| 19 | + """ |
| 20 | + User define the `apply` function from pd.DataFrame. |
| 21 | + Use only for 2-column and 3-column data. |
| 22 | +
|
| 23 | + Parameters |
| 24 | + ---------- |
| 25 | + data : pd.DataFrame |
| 26 | + The data on which the `func` function will be applied. |
| 27 | + func : function |
| 28 | + Function to apply to each column or row. |
| 29 | + axis : {0 or 'index', 1 or 'columns'}, default=1 |
| 30 | + Axis along which the function is applied. |
| 31 | +
|
| 32 | + Returns |
| 33 | + ------- |
| 34 | + pd.DataFrame |
| 35 | + Result of applying ``func`` along the given axis of the |
| 36 | + DataFrame. |
| 37 | +
|
| 38 | + Examples |
| 39 | + -------- |
| 40 | + >>> from pandas import DataFrame |
| 41 | +
|
| 42 | + >>> DataFrame.my_apply = df_apply |
| 43 | + >>> df[['name', 'brand']].my_apply(foo) |
| 44 | + """ |
| 45 | + |
| 46 | + _cols = data.columns |
| 47 | + _len = len(_cols) |
| 48 | + |
| 49 | + if _len == 2: |
| 50 | + return data.apply(lambda x: func(x[_cols[0]], x[_cols[1]]), axis=axis) |
| 51 | + return data.apply(lambda x: func(x[_cols[0]], x[_cols[1]], x[_cols[2]]), axis=axis) |
| 52 | + |
| 53 | + |
18 | 54 | class Finder: |
19 | 55 | """ |
20 | 56 | Search and recognize the name, category and brand of a product |
@@ -63,6 +99,7 @@ class Finder: |
63 | 99 | def __init__(self, pathes: Optional[Dict[str, str]] = None): |
64 | 100 | pathes = pathes or {} |
65 | 101 | self.mystem = Mystem() |
| 102 | + pd.DataFrame.appl = df_apply |
66 | 103 |
|
67 | 104 | # Init model: |
68 | 105 | model_params = {"num_class": 21, "embed_dim": 50, "vocab_size": 500} |
@@ -308,60 +345,48 @@ def __find_all(self, verbose: int) -> None: |
308 | 345 | self.__print_logs("Before:", verbose) |
309 | 346 |
|
310 | 347 | # Find brands: |
311 | | - self.data[["name_norm", "brand_norm"]] = self.data.apply( |
312 | | - lambda x: self.find_brands(x["name_norm"], x["brand_norm"]), axis=1 |
313 | | - ) |
| 348 | + self.data[["name_norm", "brand_norm"]] = self.data[ |
| 349 | + ["name_norm", "brand_norm"] |
| 350 | + ].appl(self.find_brands) |
314 | 351 | self.__print_logs("Find brands:", verbose) |
315 | 352 |
|
316 | 353 | # Find product and category: |
317 | | - self.data[["name_norm", "product_norm", "cat_norm"]] = self.data.apply( |
318 | | - lambda x: self.find_product(x["name_norm"], x["product_norm"]), axis=1 |
319 | | - ) |
| 354 | + self.data[["name_norm", "product_norm", "cat_norm"]] = self.data[ |
| 355 | + ["name_norm", "product_norm"] |
| 356 | + ].appl(self.find_product) |
320 | 357 | self.__print_logs("Find product and category:", verbose) |
321 | 358 |
|
322 | 359 | # Remove `-`: |
323 | 360 | self.data["name_norm"] = self.data["name_norm"].str.replace("-", " ") |
324 | | - self.data[["name_norm", "product_norm", "cat_norm"]] = self.data.apply( |
325 | | - lambda x: self.find_product( |
326 | | - x["name_norm"], x["product_norm"], x["cat_norm"] |
327 | | - ), |
328 | | - axis=1, |
329 | | - ) |
| 361 | + self.data[["name_norm", "product_norm", "cat_norm"]] = self.data[ |
| 362 | + ["name_norm", "product_norm", "cat_norm"] |
| 363 | + ].appl(self.find_product) |
330 | 364 | self.__print_logs( |
331 | 365 | "Remove `-` and the second attempt to find a product:", verbose |
332 | 366 | ) |
333 | 367 |
|
334 | 368 | # Use Mystem: |
335 | | - self.data["name_norm"] = self.data.apply( |
336 | | - lambda x: self._use_mystem(x["name_norm"], x["product_norm"]), axis=1 |
337 | | - ) |
338 | | - self.data[["name_norm", "product_norm", "cat_norm"]] = self.data.apply( |
339 | | - lambda x: self.find_product( |
340 | | - x["name_norm"], x["product_norm"], x["cat_norm"] |
341 | | - ), |
342 | | - axis=1, |
| 369 | + self.data["name_norm"] = self.data[["name_norm", "product_norm"]].appl( |
| 370 | + self._use_mystem |
343 | 371 | ) |
| 372 | + self.data[["name_norm", "product_norm", "cat_norm"]] = self.data[ |
| 373 | + ["name_norm", "product_norm", "cat_norm"] |
| 374 | + ].appl(self.find_product) |
344 | 375 | self.__print_logs( |
345 | 376 | "Use Mystem for lemmatization and the third attempt to find a product:", |
346 | 377 | verbose, |
347 | 378 | ) |
348 | 379 |
|
349 | 380 | # Find category: |
350 | | - self.data[["product_norm", "cat_norm"]] = self.data.apply( |
351 | | - lambda x: self.find_category( |
352 | | - x["name_norm"], x["product_norm"], x["cat_norm"] |
353 | | - ), |
354 | | - axis=1, |
355 | | - ) |
| 381 | + self.data[["product_norm", "cat_norm"]] = self.data[ |
| 382 | + ["name_norm", "product_norm", "cat_norm"] |
| 383 | + ].appl(self.find_category) |
356 | 384 | self.__print_logs("Find the remaining categories:", verbose) |
357 | 385 |
|
358 | 386 | # Find product by brand: |
359 | | - self.data[["product_norm", "brand_norm", "cat_norm"]] = self.data.apply( |
360 | | - lambda x: self.find_product_by_brand( |
361 | | - x["product_norm"], x["brand_norm"], x["cat_norm"] |
362 | | - ), |
363 | | - axis=1, |
364 | | - ) |
| 387 | + self.data[["product_norm", "brand_norm", "cat_norm"]] = self.data[ |
| 388 | + ["name_norm", "product_norm", "cat_norm"] |
| 389 | + ].appl(self.find_product) |
365 | 390 | self.__print_logs("Find product by brand:", verbose) |
366 | 391 |
|
367 | 392 | def find_all( |
|
0 commit comments