Skip to content

Commit 6f9efbb

Browse files
authored
Merge pull request #23 from ray310/003
Version 0.0.3
2 parents a9999ba + 7b59950 commit 6f9efbb

21 files changed

+575
-182
lines changed
Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,27 +7,26 @@ jobs:
77
runs-on: ubuntu-latest
88
steps:
99
# Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it
10-
- uses: actions/checkout@v3
10+
- uses: actions/checkout@v4
1111
- uses: psf/black@stable
1212
with:
1313
options: "--check --verbose"
1414
pylint:
1515
runs-on: ubuntu-latest
1616
strategy:
1717
matrix:
18-
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
18+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1919
steps:
20-
- uses: actions/checkout@v3
20+
- uses: actions/checkout@v4
2121
- name: Set up Python ${{ matrix.python-version }}
22-
uses: actions/setup-python@v3
22+
uses: actions/setup-python@v5
2323
with:
2424
python-version: ${{ matrix.python-version }}
2525
- name: Install dependencies
2626
run: |
2727
python -m pip install --upgrade pip
28-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
29-
pip install pylint
28+
python -m pip install .
29+
python -m pip install .[test]
3030
- name: Linting code with pylint
3131
run: |
3232
pylint $(git ls-files '*.py')
33-
Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,22 @@ jobs:
99
fail-fast: false
1010
matrix:
1111
os: [ubuntu-latest, macos-latest, windows-latest]
12-
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
12+
python-version: ["3.9", "3.10", "3.11", "3.12"]
1313
steps:
14-
- uses: actions/checkout@v3
14+
- uses: actions/checkout@v4
1515
with:
1616
lfs: true
1717
- name: Checkout Git-LFS objects
1818
run: git lfs checkout
1919
- name: Set up Python ${{ matrix.python-version }}
20-
uses: actions/setup-python@v3
20+
uses: actions/setup-python@v5
2121
with:
2222
python-version: ${{ matrix.python-version }}
2323
- name: Install dependencies
2424
run: |
2525
python -m pip install --upgrade pip
26-
python -m pip install pytest
27-
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
2826
python -m pip install .
27+
python -m pip install .[test]
2928
- name: Test with pytest
3029
run: |
3130
pytest

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,12 @@
99

1010
# folders
1111
.coverage
12+
.idea
1213
data
1314
dev
1415
dist
1516
htmlcov
1617
__pycache__
18+
19+
# files
20+
.DS_Store

.pre-commit-config.yaml

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
repos:
2+
- repo: https://github.com/pre-commit/pre-commit-hooks
3+
rev: v4.6.0
4+
hooks:
5+
- id: trailing-whitespace
6+
args: [--markdown-linebreak-ext=md]
7+
- id: end-of-file-fixer
8+
- id: check-yaml
9+
- id: check-added-large-files
10+
- id: check-merge-conflict
11+
- id: detect-private-key
12+
- id: mixed-line-ending
13+
- repo: https://github.com/psf/black-pre-commit-mirror
14+
rev: 24.4.2
15+
hooks:
16+
- id: black
17+
language_version: python3.12
18+
args: [--check, --verbose]
19+
- repo: https://github.com/codespell-project/codespell
20+
rev: v2.2.5
21+
hooks:
22+
- id: codespell
23+
- repo: local
24+
hooks:
25+
- id: pylint
26+
name: pylint
27+
entry: pylint
28+
language: python
29+
types: [python]
30+
require_serial: true
31+
stages: [pre-commit, pre-push]
32+
args: ["--verbose"]

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,14 @@
44
### Added
55
- Improved project documentation
66
____
7+
## 0.0.3 - Soon to be released
8+
### Added
9+
- Update for Python 3.12
10+
- Switch project build to pyproject.toml [gh-18](https://github.com/ray310/Panda-Helper/issues/18)
11+
- Simplify import: `import pandahelper` now imports `DataFrameProfile`, `SeriesProfile`, `frequency_table`, and `distribution_stats` [gh-17](https://github.com/ray310/Panda-Helper/issues/17)
12+
- Improved `SeriesProfile` to better handle different data types. [gh-19](https://github.com/ray310/Panda-Helper/issues/19)
13+
- Removed excess trailing whitespace on reports [gh-21](https://github.com/ray310/Panda-Helper/issues/21)
14+
____
715
## 0.0.2 - 2022-06-07
816
### Added
917
- Added improved type-checking for functions and profile classes

MANIFEST.in

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1 @@
11
recursive-include tests *.py *.txt *csv
2-
include requirements.txt

README.md

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
1+
![PyPI - Version](https://img.shields.io/pypi/v/panda-helper)
2+
[![Download Stats](https://img.shields.io/pypi/dm/panda-helper)](https://pypistats.org/packages/panda-helper)
13
![PyPI - Python Version](https://img.shields.io/pypi/pyversions/panda-helper)
2-
![Tests Status](https://github.com/ray310/Panda-Helper/actions/workflows/pytest_old.yml/badge.svg)
3-
![Lint/Format Status](https://github.com/ray310/Panda-Helper/actions/workflows/format_lint_old.yml/badge.svg)
4+
![Tests Status](https://github.com/ray310/Panda-Helper/actions/workflows/pytest.yml/badge.svg)
5+
![Lint/Format Status](https://github.com/ray310/Panda-Helper/actions/workflows/format_lint.yml/badge.svg)
46

57
# Panda-Helper: Quickly and easily inspect data
6-
Panda-Helper is a simple data-profiling utility for Pandas DataFrames and Series
8+
Panda-Helper is a simple data-profiling utility for Pandas' DataFrames and Series.
79

8-
Assess data quality and usefulness with minimal effort
10+
Assess data quality and usefulness with minimal effort.
911

10-
Quickly perform initial data exploration, _so you can move on to more in-depth analysis_
12+
Quickly perform initial data exploration, _so you can move on to more in-depth analysis_.
1113

1214
-----
1315
### DataFrame profiles:
@@ -23,7 +25,7 @@ _Vehicles passing through toll stations_
2325
------------------------- ------------
2426
DF Shape (1586280, 6)
2527
Duplicated Rows 2184
26-
28+
2729
Column Name Data Type
2830
-------------------------- -----------
2931
Plaza ID int64
@@ -32,7 +34,7 @@ _Vehicles passing through toll stations_
3234
Direction object
3335
# Vehicles - ETC (E-ZPass) int64
3436
# Vehicles - Cash/VToll int64
35-
37+
3638
Summary of Nulls Per Row
3739
-------------------------- -----------
3840
count 1.58628e+06
@@ -53,12 +55,12 @@ _Vehicles passing through toll stations_
5355

5456
-----
5557
### Series profiles report the:
56-
- Series data type
58+
- Series data type
5759
- Count of non-null values in the series
5860
- Number of unique values
5961
- Count of null values
6062
- Counts and frequency of the most and least common values
61-
- Distribution statistics for numeric data
63+
- Distribution statistics for numeric-like data
6264

6365
__Sample profile of categorical data__<br>
6466
_Direction vehicles are traveling_
@@ -69,7 +71,7 @@ _Direction vehicles are traveling_
6971
Count 1586280
7072
Unique Values 2
7173
Null Values 0
72-
74+
7375
Value Count % of total
7476
------- ------- ------------
7577
I 814100 51.32%
@@ -84,7 +86,7 @@ _Hourly vehicle counts at tolling points_
8486
Count 1586280
8587
Unique Values 8987
8688
Null Values 0
87-
89+
8890
Value Count % of total
8991
------- ------- ------------
9092
0 3137 0.20%
@@ -112,7 +114,7 @@ _Hourly vehicle counts at tolling points_
112114
8876 1 0.00%
113115
8261 1 0.00%
114116
8694 1 0.00%
115-
117+
116118
Statistic Value
117119
------------------------- ---------------
118120
count 1.58628e+06
@@ -141,7 +143,7 @@ __Profiling a DataFrame__<br>
141143
Create the DataFrameProfile and then display it or save the profile.
142144
```python
143145
import pandas as pd
144-
import pandahelper.reports as ph
146+
import pandahelper as ph
145147

146148
data = {
147149
"user_id": [1, 2, 3, 4, 4],
@@ -158,14 +160,14 @@ df_profile
158160
------------------------- ------
159161
DF Shape (5, 4)
160162
Obviously Duplicated Rows 1
161-
163+
162164
Column Name Data Type
163165
------------- -----------
164166
user_id int64
165167
transaction object
166168
amount float64
167169
survey object
168-
170+
169171
Summary of Nulls Per Row
170172
-------------------------- --------
171173
count 5
@@ -183,7 +185,7 @@ df_profile
183185
median absolute deviation 1
184186
standard deviation 0.83666
185187
skew 0.512241
186-
188+
187189
```python
188190
df_profile.save_report("df_profile.txt")
189191
```
@@ -200,13 +202,13 @@ series_profile
200202
Count 4
201203
Unique Values 3
202204
Null Values 1
203-
205+
204206
Value Count % of total
205207
------- ------- ------------
206208
85.12 2 50.00%
207209
100 1 25.00%
208210
1400 1 25.00%
209-
211+
210212
Statistic Value
211213
------------------------- ----------
212214
count 4
@@ -224,7 +226,7 @@ series_profile
224226
median absolute deviation 7.44
225227
standard deviation 654.998
226228
skew 1.99931
227-
229+
228230
```python
229231
series_profile.save_report("amount_profile.txt")
230232
```

conda_environment_dev.yaml

Lines changed: 18 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -3,19 +3,22 @@ channels:
33
- defaults
44
- conda-forge
55
dependencies:
6-
- python=3.9
7-
- black
8-
- build
9-
- coverage
10-
- jupyter
11-
- pandas
12-
- pip
13-
- pydocstyle
14-
- pylint
15-
- pytest
16-
- notebook
17-
- scipy
18-
- sphinx
19-
- twine
6+
- python=3.12
7+
- black=24.4.2
8+
- build=0.7.0
9+
- codespell=0.0.0
10+
- coverage=7.2.2
11+
- jupyter=1.0.0
12+
- notebook=7.0.8
13+
- numpy=1.26.4
14+
- pandas=2.2.2
15+
- pip=24.0
16+
- pre-commit=3.4.0
17+
- pydocstyle=6.3.0
18+
- pylint=3.2.2
19+
- pytest=7.4.4
20+
- scipy=1.13.1
21+
- sphinx=7.3.7
22+
- twine=4.0.2
2023
- pip:
21-
- tabulate
24+
- tabulate=0.9.0

pyproject.toml

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
[build-system]
2+
requires = ["setuptools>=61.0"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "panda-helper"
7+
version = "0.0.3"
8+
dependencies = [
9+
"pandas>=2.0",
10+
"scipy>=1.11",
11+
"tabulate",
12+
]
13+
requires-python = ">=3.9"
14+
authors = [
15+
{name = "Ray310", email = "ray310@pm.me"},
16+
]
17+
readme = "README.md"
18+
description = "Data profiler for Pandas"
19+
keywords = ["data-profiling", "data-profiling-utility", "data-analysis"]
20+
license = {file = "LICENSE"}
21+
classifiers = [
22+
"Development Status :: 3 - Alpha",
23+
"Programming Language :: Python :: 3.9",
24+
"Programming Language :: Python :: 3.10",
25+
"Programming Language :: Python :: 3.11",
26+
"Programming Language :: Python :: 3.12",
27+
"Operating System :: OS Independent",
28+
"Topic :: Software Development",
29+
"Topic :: Scientific/Engineering",
30+
"Topic :: Scientific/Engineering :: Information Analysis",
31+
]
32+
33+
[project.urls]
34+
Homepage = "https://ray310.github.io/Panda-Helper/"
35+
Repository = "https://github.com/ray310/Panda-Helper"
36+
Issues = "https://github.com/ray310/Panda-Helper/issues"
37+
38+
[project.optional-dependencies]
39+
test = ["pytest>=7.4", "pylint>=3.0"]

requirements.txt

-58 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)