๐Ÿ“ฆ Asabeneh / data-analysis-with-python-spring-2025

๐Ÿ“„ create_csv.ipynb ยท 182 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "449a21ed-eb22-4917-96fc-2bb025dd0b0e",
   "metadata": {},
   "outputs": [],
   "source": [
    "# importing necessary packages\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import requests\n",
    "from pprint import pprint"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "be3b1375-a9e8-48a7-a362-c802b60d1fb1",
   "metadata": {},
   "outputs": [],
   "source": [
    "students_data = {\n",
    "    'StudentID': list(range(1, 53)),\n",
    "    'StudentName': ['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank', 'Grace', 'Henry', 'Isabel', 'Jack', \n",
    "                    'Kelly', 'Liam', 'Mia', 'Noah', 'Olivia', 'Peter', 'Quinn', 'Rose', 'Sam', 'Tina', \n",
    "                    'Uma', 'Victor', 'Wendy', 'Xander', 'Yara', 'Zane', 'Ava', 'Ben', 'Clara', 'Dan', \n",
    "                    'Ella', 'Finn', 'Gina', 'Hank', 'Ivy', 'Jonah', 'Kara', 'Leo', 'Mila', 'Nate', \n",
    "                    'Omar', 'Pia', 'Raul', 'Sara', 'Tom', 'Uma', 'Vera', 'Will', 'Xena', 'Yuri', 'Zoe', 'Adam'],\n",
    "    'Age': [20, 21, 19, 22, 23, 20, 21, 19, 22, 20, 24, 18, 19, 22, 20, 21, 23, 20, 19, 22, \n",
    "            21, 20, 24, 18, 19, 22, 20, 21, 23, 20, 19, 22, 21, 20, 24, 18, 19, 22, 20, 21, \n",
    "            23, 20, 19, 22, 21, 20, 24, 18, 19, 22, 20, 21],\n",
    "    'Email': [f'{name.lower()}@school.com' if name != 'Uma' else f'{name.lower()}{i}@school.com' \n",
    "              for i, name in enumerate(['Alice', 'Bob', 'Charlie', 'Diana', 'Eve', 'Frank', 'Grace', 'Henry', 'Isabel', 'Jack', \n",
    "                                        'Kelly', 'Liam', 'Mia', 'Noah', 'Olivia', 'Peter', 'Quinn', 'Rose', 'Sam', 'Tina', \n",
    "                                        'Uma', 'Victor', 'Wendy', 'Xander', 'Yara', 'Zane', 'Ava', 'Ben', 'Clara', 'Dan', \n",
    "                                        'Ella', 'Finn', 'Gina', 'Hank', 'Ivy', 'Jonah', 'Kara', 'Leo', 'Mila', 'Nate', \n",
    "                                        'Omar', 'Pia', 'Raul', 'Sara', 'Tom', 'Uma', 'Vera', 'Will', 'Xena', 'Yuri', 'Zoe', 'Adam'], 1)],\n",
    "    'EnrollmentDate': ['2023-09-01', '2023-09-01', '2024-01-15', '2022-09-01', '2023-01-10', '2024-02-01', '2023-09-15', '2024-03-01', '2023-08-20', '2024-01-10',\n",
    "                       '2022-08-15', '2024-09-01', '2023-09-10', '2022-10-01', '2024-02-15', '2023-08-25', '2023-01-20', '2024-03-10', '2023-09-05', '2022-11-01',\n",
    "                       '2024-01-25', '2023-08-15', '2022-09-20', '2024-09-05', '2023-10-01', '2022-12-01', '2024-02-20', '2023-09-25', '2023-02-01', '2024-03-15',\n",
    "                       '2023-10-10', '2022-11-15', '2024-02-01', '2023-08-30', '2022-10-05', '2024-09-10', '2023-11-01', '2022-12-15', '2024-03-01', '2023-09-30',\n",
    "                       '2023-02-15', '2024-03-20', '2023-11-10', '2022-12-20', '2024-02-10', '2023-09-15', '2022-10-15', '2024-09-15', '2023-11-20', '2022-12-25',\n",
    "                       '2024-03-05', '2023-10-05'],\n",
    "    'Major': ['Computer Science', 'Biology', 'History', 'Art', 'Mathematics', 'Physics', 'Chemistry', 'English', 'Psychology', 'Economics',\n",
    "              'Sociology', 'Undecided', 'Computer Science', 'Biology', 'History', 'Art', 'Mathematics', 'Physics', 'Chemistry', 'English',\n",
    "              'Psychology', 'Economics', 'Sociology', 'Undecided', 'Computer Science', 'Biology', 'History', 'Art', 'Mathematics', 'Physics',\n",
    "              'Chemistry', 'English', 'Psychology', 'Economics', 'Sociology', 'Undecided', 'Computer Science', 'Biology', 'History', 'Art',\n",
    "              'Mathematics', 'Physics', 'Chemistry', 'English', 'Psychology', 'Economics', 'Sociology', 'Undecided', 'Computer Science', 'Biology',\n",
    "              'History', 'Art']\n",
    "}\n",
    "students_df = pd.DataFrame(students_data)\n",
    "students_df.to_csv('../data/students.csv')\n",
    "# 2. Instructors DataFrame (10 rows)\n",
    "instructors_data = {\n",
    "    'InstructorID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],\n",
    "    'InstructorName': ['Dr. Smith', 'Prof. Jones', 'Dr. Lee', 'Ms. Taylor', 'Dr. Brown', 'Prof. Adams', 'Ms. Carter', 'Mr. Wilson', 'Dr. Evans', 'Prof. Patel'],\n",
    "    'Email': ['smith@school.com', 'jones@school.com', 'lee@school.com', 'taylor@school.com', 'brown@school.com', \n",
    "              'adams@school.com', 'carter@school.com', 'wilson@school.com', 'evans@school.com', 'patel@school.com'],\n",
    "    'Department': ['Mathematics', 'Science', 'History', 'Fine Arts', 'Physics', 'Science', 'English', 'Computer Science', 'Psychology', 'Economics'],\n",
    "    'HireDate': ['2018-06-01', '2019-08-15', '2020-01-10', '2021-03-20', '2017-09-01', '2018-11-05', '2022-02-15', '2020-07-01', '2019-04-10', '2021-09-01']\n",
    "}\n",
    "instructors_df = pd.DataFrame(instructors_data)\n",
    "instructors_df.to_csv('../data/instructors.csv')\n",
    "# 3. Courses DataFrame (12 rows)\n",
    "courses_data = {\n",
    "    'CourseID': [101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112],\n",
    "    'CourseName': ['Math', 'Science', 'History', 'Art', 'Physics', 'Chemistry', 'English Lit', 'Programming', 'Psychology', 'Economics', 'Sociology', 'Statistics'],\n",
    "    'Credits': [3, 4, 3, 2, 4, 3, 3, 4, 3, 3, 3, 4],\n",
    "    'InstructorID': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, None],\n",
    "    'StartDate': ['2025-01-10', '2025-01-10', '2025-01-15', '2025-02-01', '2025-01-20', '2025-01-20', '2025-02-10', '2025-02-15', '2025-01-25', '2025-02-05', '2025-03-01', '2025-03-10'],\n",
    "    'Department': ['Mathematics', 'Science', 'History', 'Fine Arts', 'Physics', 'Science', 'English', 'Computer Science', 'Psychology', 'Economics', 'Sociology', 'Mathematics']\n",
    "}\n",
    "courses_df = pd.DataFrame(courses_data)\n",
    "courses_df.to_csv('../data/courses.csv')\n",
    "# 4. Enrollments DataFrame (60 rows)\n",
    "enrollments_data = {\n",
    "    'EnrollmentID': list(range(1, 61)),\n",
    "    'StudentID': [1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 9, 9, 10, 10, 11, 11, 3,\n",
    "                  13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 28, 29,\n",
    "                  30, 31, 32, 33, 34, 35, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46,\n",
    "                  47, 49, 50, 51, 52, 13, 25, 37],\n",
    "    'CourseID': [101, 108, 102, 105, 103, 104, 101, 106, 105, 102, 106, 102, 107, 109, 107, 110, 101, 111, 109, 107,\n",
    "                 108, 102, 103, 104, 101, 105, 106, 107, 109, 110, 111, 108, 102, 103, 104, 101, 105, 106, 107, 109,\n",
    "                 110, 111, 108, 102, 103, 104, 101, 105, 106, 107, 109, 110, 111, 108, 102, 103, 104, 101, 101, 101]\n",
    "}\n",
    "enrollments_df = pd.DataFrame(enrollments_data)\n",
    "enrollments_df.to_csv('../data/enrollments.csv')\n",
    "# 5. Grades DataFrame (60 rows)\n",
    "grades_data = {\n",
    "    'GradeID': list(range(1, 61)),\n",
    "    'EnrollmentID': list(range(1, 61)),\n",
    "    'Grade': ['A', 'A+', 'B+', 'B', 'A-', None, 'B', 'C+', 'A', 'B+', 'B-', None, None, 'A', 'B+', 'B', 'C', 'A-', 'B', None,\n",
    "              'A', 'B+', 'A-', None, 'B', 'A', 'C+', None, 'A', 'B', 'A-', 'A+', 'B+', 'A-', None, 'B', 'A', 'C+', None, 'A',\n",
    "              'B', 'A-', 'A+', 'B+', 'A-', None, 'B', 'A', 'C+', None, 'A', 'B', 'A-', 'A+', 'B+', 'A-', None, 'B', 'A', 'C+'],\n",
    "    'GradeDate': ['2025-03-15', '2025-04-01', '2025-03-20', '2025-03-25', '2025-03-18', None, '2025-03-15', '2025-03-22', '2025-03-25', '2025-03-20',\n",
    "                  '2025-03-22', None, None, '2025-03-28', '2025-04-05', '2025-04-10', '2025-03-15', '2025-04-15', '2025-03-28', None,\n",
    "                  '2025-04-01', '2025-03-20', '2025-03-18', None, '2025-03-15', '2025-03-25', '2025-03-22', None, '2025-03-28', '2025-04-10',\n",
    "                  '2025-04-15', '2025-04-01', '2025-03-20', '2025-03-18', None, '2025-03-15', '2025-03-25', '2025-03-22', None, '2025-03-28',\n",
    "                  '2025-04-10', '2025-04-15', '2025-04-01', '2025-03-20', '2025-03-18', None, '2025-03-15', '2025-03-25', '2025-03-22', None,\n",
    "                  '2025-03-28', '2025-04-10', '2025-04-15', '2025-04-01', '2025-03-20', '2025-03-18', None, '2025-03-15', '2025-03-25', '2025-03-22']\n",
    "}\n",
    "grades_df = pd.DataFrame(grades_data)\n",
    "grades_df.to_csv('../data/grades.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "eada6748-d408-440f-b490-df7a2e8432d4",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "48e79d7f-293c-4e33-81a8-faf2b1b7555d",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "f926e4b9-332e-41cc-ab57-51a990e5d85e",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "0a2ee88b-8a57-4eec-905c-cc8ddffabe4c",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "dd74d3f6-c62c-44ba-bdbe-38ad664cfeda",
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3f498088-6821-4f99-851b-2d9e7d7f1627",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}